infra/stacks/tts/main.tf
Viktor Barzin bd0cb71f17
All checks were successful
ci/woodpecker/push/default Pipeline was successful
ci/woodpecker/push/build-cli Pipeline was successful
tts: TCP probes — http liveness killed the server mid-synthesis
The devnen server runs chunked synthesis as a blocking call inside its
async handler, so the event loop (and every HTTP probe) hangs for the
whole multi-minute story. Kubelet's http liveness probe (1s timeout)
then killed the container mid-story (exit 137, twice within 10 min of
the first real drain), which reset the engine, so every following pass
started cold and tripit's 120s synthesis budget could never be met —
the queue would never drain.

TCP probes keep the meaning that matters: uvicorn binds 8004 only
after the model finishes loading in the lifespan hook, so readiness
still gates 'model loaded', while a GPU-busy server is left alive.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 20:57:28 +00:00

647 lines
26 KiB
HCL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

variable "image_tag" {
type = string
# Pinned to the devnen upstream sha the GHA build was dispatched against
# (tripit .github/workflows/build-chatterbox.yml). NOT :cu128/:latest — the
# original Forgejo-registry push is unpullable (corrupt layer blob, 500 on
# HEAD), which is also why the image moved to GHCR.
default = "915ae289"
description = "chatterbox-tts GHCR image tag (devnen upstream short sha)."
}
# ─────────────────────────────────────────────────────────────────────────────
# Option-A off-peak control (see docs/plans/2026-06-08-chatterbox-tts-infra.md §3).
# The Deployment sits at replicas=0; a CronJob scales it to 1 at the window start
# ONLY IF a free-VRAM preflight passes, and another scales it back to 0 at window
# end. A guard CronJob yields the card mid-window if free VRAM drops below the
# floor (a resident woke up). tripit's bake is best-effort + idempotent, so a
# skipped/aborted window simply backfills on the next one (ADR-0002/0004).
# ─────────────────────────────────────────────────────────────────────────────
variable "vram_free_floor_bytes" {
type = number
# OPEN ITEM — must be measured (§5 smoke test / §3.X). This is the minimum free
# VRAM the preflight requires before it will scale Chatterbox up, and the floor
# the guard yields below. Default = 6 GiB ≈ (a conservative guess for
# chatterbox-multilingual FP16 peak ~4 GiB + ~2 GiB headroom for the
# read→cudaMalloc race). RAISE/LOWER once the real T4 peak is captured from
# gpu_pod_memory_used_bytes{namespace="tts"} during a real synth.
default = 6442450944
description = "Minimum free GPU VRAM (bytes) required before scaling Chatterbox up; guard yields below it."
}
variable "gpu_total_bytes" {
type = number
default = 17179869184 # Tesla T4 = 16 GiB
description = "Total VRAM on the shared GPU. Free = this minus sum(gpu_pod_memory_used_bytes)."
}
variable "offpeak_window_up_schedule" {
type = string
default = "0 2 * * *" # 02:00 Europe/London (see timezone on the CronJob)
description = "Cron schedule that fires the free-VRAM preflight + scale-up at window start."
}
variable "offpeak_window_down_schedule" {
type = string
default = "0 6 * * *" # 06:00 Europe/London
description = "Cron schedule that scales Chatterbox back to 0 at window end."
}
variable "offpeak_guard_schedule" {
type = string
# ALL-DAY since the demand gate (2026-06-12): live synthesis can hold the
# card at any hour, so the yield-on-VRAM-pressure guard must watch at any
# hour too. A guard tick while replicas=0 is a no-op.
default = "*/5 * * * *"
description = "Cron schedule for the guard that yields the card if free VRAM drops below the floor."
}
locals {
namespace = "tts"
labels = { app = "chatterbox-tts" }
image = "ghcr.io/viktorbarzin/chatterbox-tts:${var.image_tag}"
# config.yaml rendered into a ConfigMap, mounted at /app/config.yaml (the
# server's WORKDIR is /app). Voices, reference audio and the HF model cache
# all live on the NFS-SSD PVC (mounted at /data) so weights persist across
# restarts and load fast. server.port stays at the devnen default 8004; the
# Service remaps 8000->8004 so tripit's default TTS_BASE_URL works unchanged.
#
# model.repo_id = chatterbox-multilingual (ADR-0004; 23 languages for
# worldwide place-names). If the measured T4 VRAM peak is too high to coexist
# even off-peak, fall back to "chatterbox" (English, lighter) — a one-line
# change here (§3.X / §6 decision 3).
chatterbox_config = yamlencode({
server = {
host = "0.0.0.0"
port = 8004
}
model = {
repo_id = "chatterbox-multilingual"
}
tts_engine = {
device = "cuda"
# Predefined voices come from the IMAGE's bundled set (28 reference WAVs
# under the devnen server's /app/voices) rather than the NFS PVC: nobody
# can seed /data/voices without NFS-host shell access, and an empty
# predefined dir means /v1/audio/voices serves nothing (it gates the
# readiness probe). tripit's Voice catalog (tripit#30) names a subset of
# these stems. /data keeps reference_audio (future cloning) + HF cache.
predefined_voices_path = "/app/voices"
reference_audio_path = "/data/reference_audio"
}
})
# Shared script for the off-peak CronJobs. Reads the in-cluster
# gpu_pod_memory_used_bytes gauge (the per-namespace gauge the 2026-06-02
# post-mortem built — host-PID attribution, no new exporter needed), sums it,
# and computes free = GPU_TOTAL - used. Pure POSIX + awk; curl is baked into
# the curl image. ACTION is "up" | "down" | "guard".
# up — scale to 1 ONLY IF free >= FLOOR (positive admission).
# guard — scale to 0 IF free < FLOOR (a resident woke mid-window; yield).
# down — scale to 0 unconditionally (window end).
# Heredoc escaping: only `$${...}` (literal `${...}`) is escaped — Terraform
# would otherwise try to interpolate it. Bare `$(...)`, `$((...))` and awk's
# `$NF` are literal `$` and pass through unescaped.
vram_gate_script = <<-EOT
set -eu
: "$${ACTION:?}" "$${FLOOR:?}" "$${GPU_TOTAL:?}"
METRICS_URL="http://gpu-pod-exporter.nvidia.svc.cluster.local:80/metrics"
# Sum gpu_pod_memory_used_bytes across all pods. Missing metric / empty
# scrape => used=0 (card idle). -f so a non-200 scrape is a hard error we
# treat conservatively (skip scale-up).
if ! BODY="$(curl -sf -m 10 "$${METRICS_URL}")"; then
echo "WARN: could not scrape $${METRICS_URL}"
if [ "$${ACTION}" = "up" ] || [ "$${ACTION}" = "demand" ]; then
echo "$${ACTION}: scrape failed -> NOT scaling up (fail-safe)"; exit 0
fi
# For down/guard a failed scrape must NOT block yielding the card.
BODY=""
fi
USED="$(printf '%s\n' "$${BODY}" \
| awk '/^gpu_pod_memory_used_bytes\{/ { s += $NF } END { printf "%d", s }')"
USED="$${USED:-0}"
FREE="$(( GPU_TOTAL - USED ))"
echo "GPU VRAM: used=$${USED} free=$${FREE} floor=$${FLOOR} (total=$${GPU_TOTAL})"
case "$${ACTION}" in
up)
if [ "$${FREE}" -ge "$${FLOOR}" ]; then
echo "preflight PASS: free >= floor -> scaling chatterbox-tts to 1"
kubectl -n tts scale deploy/chatterbox-tts --replicas=1
else
echo "preflight SKIP: free < floor -> leaving chatterbox-tts at 0 (retry next window)"
fi
;;
guard)
if [ "$${FREE}" -lt "$${FLOOR}" ]; then
echo "guard TRIP: free < floor -> yielding the card, scaling chatterbox-tts to 0"
kubectl -n tts scale deploy/chatterbox-tts --replicas=0
else
echo "guard OK: free >= floor -> chatterbox-tts may keep running"
fi
;;
down)
echo "window end -> scaling chatterbox-tts to 0"
kubectl -n tts scale deploy/chatterbox-tts --replicas=0
;;
demand)
# GPU-gated LIVE narration (tripit#24 amendment, 2026-06-12): scale up
# whenever tripit has audio waiting AND the card has room; idle back
# down when the queue empties (even inside the nightly window — done is
# done, free the card early). The 02:00 window-up stays the guaranteed
# nightly catch-up for days the daytime card never had room.
# A FAILED probe must not read as "queue empty": defaulting to 0 idled
# the deployment the very minute it first went Ready (2026-06-12 20:30
# UTC — one transient curl failure, 27 items still queued). Fail-safe
# is NO ACTION; worst case a stale-up deployment idles until the 06:00
# window-down. (This also covers a 404 from an older tripit image.)
if ! QBODY="$(curl -sf -m 10 "$${QUEUE_URL}")"; then
echo "demand: queue probe failed -> no action (fail-safe)"; exit 0
fi
QUEUED="$(printf '%s\n' "$${QBODY}" \
| sed -n 's/.*"queued"[^0-9]*\([0-9][0-9]*\).*/\1/p')"
if [ -z "$${QUEUED}" ]; then
echo "demand: unparseable queue response -> no action (fail-safe)"; exit 0
fi
REPLICAS="$(kubectl -n tts get deploy/chatterbox-tts -o jsonpath='{.spec.replicas}')"
echo "demand: queued=$${QUEUED} replicas=$${REPLICAS}"
if [ "$${QUEUED}" -gt 0 ] && [ "$${REPLICAS}" = "0" ]; then
if [ "$${FREE}" -ge "$${FLOOR}" ]; then
echo "demand: audio waiting + room on the card -> scaling chatterbox-tts to 1"
kubectl -n tts scale deploy/chatterbox-tts --replicas=1
else
echo "demand: audio waiting but free < floor -> staying down (nightly window catches up)"
fi
elif [ "$${QUEUED}" -eq 0 ] && [ "$${REPLICAS}" != "0" ]; then
echo "demand: queue empty -> idling chatterbox-tts back to 0"
kubectl -n tts scale deploy/chatterbox-tts --replicas=0
fi
;;
esac
EOT
# Common spec for the three off-peak CronJobs. Each runs one bitnami/kubectl
# pod (in-cluster SA, no kubeconfig) executing the shared gate script with a
# different ACTION. timezone pins the window to Europe/London regardless of
# node TZ.
offpeak_cronjobs = {
chatterbox-window-up = {
schedule = var.offpeak_window_up_schedule
action = "up"
}
chatterbox-window-down = {
schedule = var.offpeak_window_down_schedule
action = "down"
}
chatterbox-vram-guard = {
schedule = var.offpeak_guard_schedule
action = "guard"
}
# GPU-gated live narration: every 3 min, scale up when tripit's audio queue
# is non-empty and the VRAM preflight passes; idle down when it empties.
chatterbox-demand-gate = {
schedule = "*/3 * * * *"
action = "demand"
}
}
# tripit's unauthenticated in-cluster queue probe (count only, non-sensitive).
# Probe failures (incl. a 404 from an older tripit image) make the demand
# gate take NO action — only an explicit parsed count scales anything.
tripit_queue_url = "http://tripit.tripit.svc.cluster.local:8080/api/tour/tts-queue"
}
resource "kubernetes_namespace" "tts" {
metadata {
name = local.namespace
labels = {
tier = local.tiers.gpu
"istio-injection" = "disabled"
"keel.sh/enrolled" = "true"
}
}
lifecycle {
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
# Model weights + voices on NFS-SSD (fast load), RWX so a seed Job / kubectl cp
# can write the predefined voices + narrator reference WAV while the Deployment
# mounts it. Path /srv/nfs-ssd/chatterbox on the Proxmox host. Mirrors
# llama-cpp's nfs_models. First start downloads the model into /data/hf_cache
# (HF_HOME below), so weights persist across pod restarts.
module "nfs_models" {
source = "../../modules/kubernetes/nfs_volume"
name = "chatterbox-models"
namespace = kubernetes_namespace.tts.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs-ssd/chatterbox"
storage = "20Gi" # multilingual weights + HF cache + voices headroom
}
# One-shot bootstrap: /srv/nfs-ssd is exported whole-tree, but the chatterbox
# SUBDIR never existed on the host (a manual go-live step nobody with NFS-host
# shell access ran), so kubelet's subdir mount failed with exit 32 forever
# (observed first window, 2026-06-12). Mount the export ROOT — which exists —
# and mkdir the subtree; kubelet's periodic mount retry then self-heals the
# chatterbox pod. mkdir -p is idempotent; the Job is immutable-once-created.
resource "kubernetes_job" "models_dir_init" {
metadata {
name = "chatterbox-models-dir-init"
namespace = kubernetes_namespace.tts.metadata[0].name
labels = local.labels
}
spec {
backoff_limit = 3
ttl_seconds_after_finished = 86400
template {
metadata { labels = local.labels }
spec {
restart_policy = "Never"
container {
name = "mkdir"
image = "busybox:1.37"
command = ["sh", "-c", "mkdir -p /mnt/chatterbox/hf_cache /mnt/chatterbox/reference_audio && ls -la /mnt/chatterbox"]
volume_mount {
name = "nfs-ssd-root"
mount_path = "/mnt"
}
}
volume {
name = "nfs-ssd-root"
nfs {
server = "192.168.1.127"
path = "/srv/nfs-ssd"
}
}
}
}
}
wait_for_completion = true
timeouts { create = "3m" }
}
# Pull secret for the PRIVATE ghcr.io/viktorbarzin/chatterbox-tts image (built
# off-infra by tripit's build-chatterbox.yml GHA workflow — the Forgejo registry
# copy is unpullable, corrupt layer blob). Mirrors stacks/tripit's ghcr secret.
data "vault_kv_secret_v2" "viktor" {
mount = "secret"
name = "viktor"
}
resource "kubernetes_secret" "ghcr_credentials" {
metadata {
name = "ghcr-credentials"
namespace = kubernetes_namespace.tts.metadata[0].name
}
type = "kubernetes.io/dockerconfigjson"
data = {
".dockerconfigjson" = jsonencode({
auths = {
"ghcr.io" = {
username = "ViktorBarzin"
password = data.vault_kv_secret_v2.viktor.data["github_pat"]
auth = base64encode("ViktorBarzin:${data.vault_kv_secret_v2.viktor.data["github_pat"]}")
}
}
})
}
}
resource "kubernetes_config_map" "chatterbox_config" {
metadata {
name = "chatterbox-config"
namespace = kubernetes_namespace.tts.metadata[0].name
labels = local.labels
}
data = {
"config.yaml" = local.chatterbox_config
}
}
# Single Deployment running the devnen Chatterbox-TTS-Server (OpenAI-compatible
# /v1/audio/speech). Sits at replicas=0 — the off-peak CronJobs below scale it
# to 1 only when the free-VRAM preflight passes (Option A), and back to 0 at
# window end. wait_for_rollout=false so apply never blocks on a pod that is
# intentionally scaled to 0.
resource "kubernetes_deployment" "chatterbox" {
metadata {
name = "chatterbox-tts"
namespace = kubernetes_namespace.tts.metadata[0].name
labels = merge(local.labels, { tier = local.tiers.gpu })
}
wait_for_rollout = false
spec {
# Off-peak control owns the replica count at runtime (CronJobs scale 0<->1).
# Declare 0 here so a plain `tg apply` outside the window doesn't wake the
# card. ignore_changes on replicas (below) stops apply from fighting the
# CronJob's scale.
replicas = 0
strategy { type = "Recreate" }
selector {
match_labels = { app = "chatterbox-tts" }
}
template {
metadata {
labels = { app = "chatterbox-tts" }
annotations = {
"checksum/config" = sha256(local.chatterbox_config)
}
}
spec {
node_selector = { "nvidia.com/gpu.present" = "true" }
toleration {
key = "nvidia.com/gpu"
operator = "Equal"
value = "true"
effect = "NoSchedule"
}
# C-hardening (§3.RECOMMENDATION.3): Chatterbox is a polite, best-effort
# batch tenant — give it the regular tier-2-gpu priority (600000) so it
# is ALWAYS the pod evicted under GPU-node pressure, never immich-ml /
# frigate / llama-swap. This relies on the `tts` namespace being EXCLUDED
# from the Kyverno `inject-gpu-workload-priority` policy (which would
# otherwise stamp the immich-equal gpu-workload=1,200,000 priority on any
# nvidia.com/gpu pod). That exclusion is the two-line edit to the kyverno
# stack flagged in the PR. Without it, this priority_class_name is
# overwritten on pod CREATE and Chatterbox would compete as an equal.
priority_class_name = "tier-2-gpu"
image_pull_secrets { name = "registry-credentials" }
image_pull_secrets {
name = kubernetes_secret.ghcr_credentials.metadata[0].name
}
# tripit's voice catalog sends bare stems ("Emily"); the server resolves
# the voice as a LITERAL filename in predefined_voices_path THEN in
# reference_audio (404 otherwise, observed 2026-06-12: all 27 queued
# narrations failed with "Voice file 'Emily' not found"). Upstream HEAD
# (= our pinned sha) has no stem fallback, and symlinks can't bridge it
# because safe_resolve_within() .resolve()s them out of the containment
# check. So seed REAL extension-less copies of the bundled voices into
# reference_audio on the PVC (the second lookup path). Same image as the
# main container = no extra pull; idempotent; ~15 MB once. The engine
# sniffs audio content, not extensions.
init_container {
name = "seed-stem-voices"
image = local.image
command = ["sh", "-c", <<-EOC
set -eu
mkdir -p /data/reference_audio
for f in /app/voices/*.wav /app/voices/*.mp3; do
[ -e "$f" ] || continue
stem="$(basename "$f")"; stem="$${stem%.*}"
[ -e "/data/reference_audio/$stem" ] || cp "$f" "/data/reference_audio/$stem"
done
echo "reference_audio seeded:"; ls /data/reference_audio
EOC
]
volume_mount {
name = "models"
mount_path = "/data"
}
}
container {
name = "chatterbox-tts"
image = local.image
port {
container_port = 8004
name = "http"
}
# T4 is Turing NO bf16 (ADR-0004). Pin off; run FP16/FP32.
env {
name = "TTS_BF16"
value = "off"
}
# Park the HuggingFace cache on the NFS-SSD PVC so model weights
# download once and persist across pod restarts (the pod is recreated
# every window). The devnen compose mounts HF cache at /app/hf_cache;
# point HF_HOME at the PVC instead.
env {
name = "HF_HOME"
value = "/data/hf_cache"
}
env {
name = "HF_HUB_CACHE"
value = "/data/hf_cache"
}
volume_mount {
name = "config"
mount_path = "/app/config.yaml"
sub_path = "config.yaml"
}
volume_mount {
name = "models"
mount_path = "/data"
}
# TCP probes, deliberately NOT http: the server synthesizes chunks
# as a BLOCKING call inside its async handler, so the event loop
# and any HTTP probe hangs for the whole multi-minute story. The
# http liveness probe killed the container mid-synthesis (exit 137,
# observed 2026-06-12 20:4820:53: every drain pass then faced a
# cold engine and timed out forever). TCP keeps the original
# semantics where it matters: uvicorn only binds 8004 AFTER the
# lifespan hook finishes loading the model ("Application startup
# complete" precedes "Uvicorn running"), so a TCP readiness pass
# still means "model loaded", while a GPU-busy server stays alive.
readiness_probe {
tcp_socket {
port = 8004
}
initial_delay_seconds = 20
period_seconds = 15
failure_threshold = 12
}
liveness_probe {
tcp_socket {
port = 8004
}
initial_delay_seconds = 120
period_seconds = 30
failure_threshold = 5
}
resources {
requests = {
cpu = "200m"
memory = "2Gi"
}
limits = {
memory = "8Gi"
"nvidia.com/gpu" = "1" # ONE time-slice (operator advertises 100), NOT the whole card
}
}
}
volume {
name = "config"
config_map {
name = kubernetes_config_map.chatterbox_config.metadata[0].name
}
}
volume {
name = "models"
persistent_volume_claim {
claim_name = module.nfs_models.claim_name
}
}
}
}
}
lifecycle {
ignore_changes = [
# Off-peak CronJobs own the replica count don't let apply reset it.
spec[0].replicas,
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
# image is TF-OWNED (pinned GHCR sha tag) NOT keel-managed: keel can't
# poll the private GHCR repo, and the 2026-06-12 registry switch must apply.
metadata[0].annotations["keel.sh/match-tag"],
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"],
]
}
}
resource "kubernetes_service" "chatterbox" {
metadata {
name = "chatterbox-tts"
namespace = kubernetes_namespace.tts.metadata[0].name
labels = local.labels
annotations = {
# Prometheus annotation-based scrape (mirrors tripit). The devnen server
# has no /metrics; this monitors liveness via the blackbox path and keeps
# the Service in the scrape set if a /metrics endpoint is added later.
"prometheus.io/scrape" = "true"
"prometheus.io/path" = "/v1/audio/voices"
"prometheus.io/port" = "8000"
}
}
spec {
type = "ClusterIP" # in-cluster only never ingressed (no token needed)
selector = { app = "chatterbox-tts" }
port {
name = "http"
port = 8000 # tripit's default TTS_BASE_URL port
target_port = 8004 # the devnen server's actual listen port
}
}
}
#
# Option-A off-peak control: SA + Role (scale the Deployment) + RoleBinding +
# three CronJobs (window-up preflight, mid-window guard, window-down). Mirrors
# the nextcloud-watchdog in-cluster-kubectl pattern (SA Role bitnami/kubectl
# CronJob, no kubeconfig).
#
resource "kubernetes_service_account" "offpeak" {
metadata {
name = "chatterbox-offpeak"
namespace = kubernetes_namespace.tts.metadata[0].name
}
}
resource "kubernetes_role" "offpeak" {
metadata {
name = "chatterbox-offpeak"
namespace = kubernetes_namespace.tts.metadata[0].name
}
# get + patch on the deployment scale subresource is all the gate needs.
rule {
api_groups = ["apps"]
resources = ["deployments", "deployments/scale"]
verbs = ["get", "patch"]
}
}
resource "kubernetes_role_binding" "offpeak" {
metadata {
name = "chatterbox-offpeak"
namespace = kubernetes_namespace.tts.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.offpeak.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.offpeak.metadata[0].name
namespace = kubernetes_namespace.tts.metadata[0].name
}
}
resource "kubernetes_cron_job_v1" "offpeak" {
for_each = local.offpeak_cronjobs
metadata {
name = each.key
namespace = kubernetes_namespace.tts.metadata[0].name
labels = local.labels
}
spec {
schedule = each.value.schedule
timezone = "Europe/London"
concurrency_policy = "Forbid"
starting_deadline_seconds = 120
successful_jobs_history_limit = 1
failed_jobs_history_limit = 3
job_template {
metadata { labels = local.labels }
spec {
backoff_limit = 1
active_deadline_seconds = 120
ttl_seconds_after_finished = 300
template {
metadata { labels = local.labels }
spec {
service_account_name = kubernetes_service_account.offpeak.metadata[0].name
restart_policy = "Never"
container {
name = "vram-gate"
image = "bitnami/kubectl:latest"
command = ["/bin/bash", "-c", local.vram_gate_script]
env {
name = "ACTION"
value = each.value.action
}
env {
name = "FLOOR"
value = tostring(var.vram_free_floor_bytes)
}
env {
name = "GPU_TOTAL"
value = tostring(var.gpu_total_bytes)
}
env {
name = "QUEUE_URL"
value = local.tripit_queue_url
}
resources {
requests = { cpu = "20m", memory = "64Mi" }
limits = { memory = "128Mi" }
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno mutates dns_config with ndots=2 on CronJobs.
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# Apply trigger 2026-06-11 (tripit#26): the previous push was a merge commit, so
# the changed-stack detector (git diff HEAD~1 HEAD = first-parent diff) missed
# stacks/tts entirely. Non-merge commit so the diff names this stack.