[ci skip] poison-fountain: fix single point of failure causing transient service outages

- Scale to 2 replicas with RollingUpdate (maxUnavailable=0)
- Add topology spread constraint to place pods on different nodes
- Switch from single-threaded to ThreadingMixIn HTTP server so tarpit
  slow-drip requests no longer block /auth and /healthz endpoints
This commit is contained in:
Viktor Barzin 2026-02-25 21:05:14 +00:00
parent 9caec43351
commit a0799d525d
No known key found for this signature in database
GPG key ID: 0EB088298288D958
2 changed files with 22 additions and 3 deletions

View file

@ -14,6 +14,7 @@ import random
import time
import hashlib
import sys
import socketserver
LISTEN_PORT = int(os.environ.get("PORT", "8080"))
CACHE_DIR = os.environ.get("CACHE_DIR", "/data/cache")
@ -163,8 +164,12 @@ class PoisonHandler(http.server.BaseHTTPRequestHandler):
pass
class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
daemon_threads = True
if __name__ == "__main__":
os.makedirs(CACHE_DIR, exist_ok=True)
server = http.server.HTTPServer(("0.0.0.0", LISTEN_PORT), PoisonHandler)
server = ThreadedHTTPServer(("0.0.0.0", LISTEN_PORT), PoisonHandler)
print(f"Poison Fountain service listening on :{LISTEN_PORT}", flush=True)
server.serve_forever()

View file

@ -54,9 +54,13 @@ resource "kubernetes_deployment" "poison_fountain" {
}
spec {
replicas = 1
replicas = 2
strategy {
type = "Recreate"
type = "RollingUpdate"
rolling_update {
max_unavailable = 0
max_surge = 1
}
}
selector {
match_labels = {
@ -70,6 +74,16 @@ resource "kubernetes_deployment" "poison_fountain" {
}
}
spec {
topology_spread_constraint {
max_skew = 1
topology_key = "kubernetes.io/hostname"
when_unsatisfiable = "DoNotSchedule"
label_selector {
match_labels = {
app = "poison-fountain"
}
}
}
container {
name = "poison-fountain"
image = "python:3.12-slim"