From 98f1f7fc24368ab3ab87e7a35088a59509b77cd9 Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Fri, 12 Jun 2026 20:41:50 +0000
Subject: [PATCH] tts: seed extension-less voice copies so tripit's bare stems
 resolve
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First live drain failed all 27 queued narrations with 404 'Voice file
'Emily' not found': tripit's catalog sends bare stems (Emily) but the
devnen server resolves the voice as a literal filename (Emily.wav) in
predefined_voices_path then reference_audio — no stem fallback exists
upstream (HEAD == our pinned sha), and symlinks can't bridge it because
safe_resolve_within() resolves them out of the containment check.

New initContainer on the chatterbox deployment copies the 28 bundled
voices to /data/reference_audio/<stem> on the PVC (second lookup path).
Same image as the main container so no extra pull; idempotent; ~15 MB.
Verified live before committing: an extension-less copy synthesizes
200 audio/mp3 (5.3s warm) where voice=Emily 404'd.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 stacks/tts/main.tf | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
diff --git a/stacks/tts/main.tf b/stacks/tts/main.tf
index b9df17c3..5056fced 100644
--- a/stacks/tts/main.tf
+++ b/stacks/tts/main.tf
@@ -374,6 +374,36 @@ resource "kubernetes_deployment" "chatterbox" {
           name = kubernetes_secret.ghcr_credentials.metadata[0].name
         }
 
+        # tripit's voice catalog sends bare stems ("Emily"); the server resolves
+        # the voice as a LITERAL filename in predefined_voices_path THEN in
+        # reference_audio (404 otherwise, observed 2026-06-12: all 27 queued
+        # narrations failed with "Voice file 'Emily' not found"). Upstream HEAD
+        # (= our pinned sha) has no stem fallback, and symlinks can't bridge it
+        # because safe_resolve_within() .resolve()s them out of the containment
+        # check. So seed REAL extension-less copies of the bundled voices into
+        # reference_audio on the PVC (the second lookup path). Same image as the
+        # main container = no extra pull; idempotent; ~15 MB once. The engine
+        # sniffs audio content, not extensions.
+        init_container {
+          name  = "seed-stem-voices"
+          image = local.image
+          command = ["sh", "-c", <<-EOC
+            set -eu
+            mkdir -p /data/reference_audio
+            for f in /app/voices/*.wav /app/voices/*.mp3; do
+              [ -e "$f" ] || continue
+              stem="$(basename "$f")"; stem="$${stem%.*}"
+              [ -e "/data/reference_audio/$stem" ] || cp "$f" "/data/reference_audio/$stem"
+            done
+            echo "reference_audio seeded:"; ls /data/reference_audio
+          EOC
+          ]
+          volume_mount {
+            name       = "models"
+            mount_path = "/data"
+          }
+        }
+
         container {
           name  = "chatterbox-tts"
           image = local.image