diff --git a/stacks/tts/main.tf b/stacks/tts/main.tf
index 5056fced..e8c4d76e 100644
--- a/stacks/tts/main.tf
+++ b/stacks/tts/main.tf
@@ -440,12 +440,18 @@ resource "kubernetes_deployment" "chatterbox" {
             mount_path = "/data"
           }
 
-          # /v1/audio/voices is cheap and only 200s once the model is loaded —
-          # so it gates real readiness. First start downloads the model, which
-          # is slow; the generous failure_threshold absorbs that.
+          # TCP probes, deliberately NOT http: the server synthesizes chunks
+          # as a BLOCKING call inside its async handler, so the event loop —
+          # and any HTTP probe — hangs for the whole multi-minute story. The
+          # http liveness probe killed the container mid-synthesis (exit 137,
+          # observed 2026-06-12 20:48–20:53: every drain pass then faced a
+          # cold engine and timed out forever). TCP keeps the original
+          # semantics where it matters: uvicorn only binds 8004 AFTER the
+          # lifespan hook finishes loading the model ("Application startup
+          # complete" precedes "Uvicorn running"), so a TCP readiness pass
+          # still means "model loaded", while a GPU-busy server stays alive.
           readiness_probe {
-            http_get {
-              path = "/v1/audio/voices"
+            tcp_socket {
               port = 8004
             }
             initial_delay_seconds = 20
@@ -453,8 +459,7 @@ resource "kubernetes_deployment" "chatterbox" {
             failure_threshold     = 12
           }
           liveness_probe {
-            http_get {
-              path = "/v1/audio/voices"
+            tcp_socket {
               port = 8004
             }
             initial_delay_seconds = 120