diff --git a/stacks/tts/main.tf b/stacks/tts/main.tf
index 609e49b1..0f0bd215 100644
--- a/stacks/tts/main.tf
+++ b/stacks/tts/main.tf
@@ -44,9 +44,12 @@ variable "offpeak_window_down_schedule" {
 }
 
 variable "offpeak_guard_schedule" {
-  type        = string
-  default     = "*/5 2-5 * * *" # every 5 min inside the 02:00–06:00 window
-  description = "Cron schedule for the mid-window guard that yields the card if free VRAM drops."
+  type = string
+  # ALL-DAY since the demand gate (2026-06-12): live synthesis can hold the
+  # card at any hour, so the yield-on-VRAM-pressure guard must watch at any
+  # hour too. A guard tick while replicas=0 is a no-op.
+  default     = "*/5 * * * *"
+  description = "Cron schedule for the guard that yields the card if free VRAM drops below the floor."
 }
 
 locals {
@@ -106,8 +109,8 @@ locals {
     # treat conservatively (skip scale-up).
     if ! BODY="$(curl -sf -m 10 "$${METRICS_URL}")"; then
       echo "WARN: could not scrape $${METRICS_URL}"
-      if [ "$${ACTION}" = "up" ]; then
-        echo "preflight: scrape failed -> NOT scaling up (fail-safe)"; exit 0
+      if [ "$${ACTION}" = "up" ] || [ "$${ACTION}" = "demand" ]; then
+        echo "$${ACTION}: scrape failed -> NOT scaling up (fail-safe)"; exit 0
       fi
       # For down/guard a failed scrape must NOT block yielding the card.
       BODY=""
@@ -139,6 +142,29 @@ locals {
         echo "window end -> scaling chatterbox-tts to 0"
         kubectl -n tts scale deploy/chatterbox-tts --replicas=0
         ;;
+      demand)
+        # GPU-gated LIVE narration (tripit#24 amendment, 2026-06-12): scale up
+        # whenever tripit has audio waiting AND the card has room; idle back
+        # down when the queue empties (even inside the nightly window — done is
+        # done, free the card early). The 02:00 window-up stays the guaranteed
+        # nightly catch-up for days the daytime card never had room.
+        QUEUED="$(curl -sf -m 10 "$${QUEUE_URL}" \
+          | sed -n 's/.*"queued"[^0-9]*\([0-9][0-9]*\).*/\1/p')" || QUEUED=""
+        QUEUED="$${QUEUED:-0}"
+        REPLICAS="$(kubectl -n tts get deploy/chatterbox-tts -o jsonpath='{.spec.replicas}')"
+        echo "demand: queued=$${QUEUED} replicas=$${REPLICAS}"
+        if [ "$${QUEUED}" -gt 0 ] && [ "$${REPLICAS}" = "0" ]; then
+          if [ "$${FREE}" -ge "$${FLOOR}" ]; then
+            echo "demand: audio waiting + room on the card -> scaling chatterbox-tts to 1"
+            kubectl -n tts scale deploy/chatterbox-tts --replicas=1
+          else
+            echo "demand: audio waiting but free < floor -> staying down (nightly window catches up)"
+          fi
+        elif [ "$${QUEUED}" -eq 0 ] && [ "$${REPLICAS}" != "0" ]; then
+          echo "demand: queue empty -> idling chatterbox-tts back to 0"
+          kubectl -n tts scale deploy/chatterbox-tts --replicas=0
+        fi
+        ;;
     esac
   EOT
 
@@ -159,7 +185,17 @@ locals {
       schedule = var.offpeak_guard_schedule
       action   = "guard"
     }
+    # GPU-gated live narration: every 3 min, scale up when tripit's audio queue
+    # is non-empty and the VRAM preflight passes; idle down when it empties.
+    chatterbox-demand-gate = {
+      schedule = "*/3 * * * *"
+      action   = "demand"
+    }
   }
+
+  # tripit's unauthenticated in-cluster queue probe (count only, non-sensitive).
+  # A 404 from an older tripit image yields QUEUED=0 -> the gate no-ops.
+  tripit_queue_url = "http://tripit.tripit.svc.cluster.local:8080/api/tour/tts-queue"
 }
 
 resource "kubernetes_namespace" "tts" {
@@ -463,6 +499,10 @@ resource "kubernetes_cron_job_v1" "offpeak" {
                 name  = "GPU_TOTAL"
                 value = tostring(var.gpu_total_bytes)
               }
+              env {
+                name  = "QUEUE_URL"
+                value = local.tripit_queue_url
+              }
               resources {
                 requests = { cpu = "20m", memory = "64Mi" }
                 limits   = { memory = "128Mi" }