infra/stacks/portal-tts/main.tf

204 lines
8.2 KiB
Terraform
Raw Normal View History

2026-06-17 20:25:29 +00:00
# =============================================================================
# portal-tts — edge-tts (CPU, always-on) for the portal-assistant Gateway
# =============================================================================
#
# WHAT: a single ALWAYS-ON openai-edge-tts deployment (travisvn/openai-edge-tts),
# an OpenAI-compatible /v1/audio/speech proxy over Microsoft edge-tts neural
# voices, serving Bulgarian (bg-BG-KalinaNeural) AND English (en-US-AvaNeural),
# the voice chosen PER REQUEST by the Gateway, behind a ClusterIP Service
# `portal-tts.portal-tts.svc:8000`. CPU-only — no GPU, no NFS model store.
#
# WHY edge-tts (REPLACED Piper / openedai-speech on 2026-06-17): the local Piper
# Bulgarian voice (bg_BG-dimitar-medium, espeak-ng phonemes) was garbled and
# unintelligible — espeak mangles Bulgarian consonants (a synth->Whisper
# round-trip turned "Добър ден" into "Обърден"; a user heard pure gibberish).
# ADR-0003 always named Microsoft edge-tts as the online Bulgarian-quality
# fallback; the operator chose it for BOTH languages (validated 2026-06-17: edge
# bg round-trips through Whisper verbatim — "Добър ден! Как сте днес? ..."). The
# assistant already depends on the internet for the Claude brain, so an online
# TTS adds no new failure mode. English moved to edge too (one engine, higher
# quality) — the previous local Piper English worked but is no longer needed.
#
# NO GPU, NO NFS, NO SECRETS: edge-tts fetches voices from Microsoft on demand
# (nothing to persist), so the NFS model PVC + download init-container + voice
# ConfigMap of the old Piper design are all gone. The container needs EGRESS to
# speech.platform.bing.com (verified reachable from this namespace). The Service
# is ClusterIP-only and the Gateway is the sole externally-exposed component
# (ADR-0001) holding the edge auth, so REQUIRE_API_KEY=False here (the Gateway's
# TTSClient sends no Authorization to TTS).
#
# API SHAPE (unchanged Gateway contract): OpenAI /v1/audio/speech
# POST /v1/audio/speech
# { "model":"tts-1", "input":"<text>", "voice":"<edge voice name>",
# "response_format":"wav" } -> 200, body = raw PCM16 wav bytes
# The Gateway maps detected lang bg/en -> TTS_VOICE_BG / TTS_VOICE_EN (the edge
# voice names, set on the gateway Deployment), and openai-edge-tts accepts edge
# voice names directly. The `-ffmpeg` image variant is REQUIRED for wav output
# (the base image only emits mp3; ffmpeg transcodes to PCM16 wav).
# =============================================================================
variable "edge_tts_image" {
type = string
# openai-edge-tts, the OpenAI-compatible edge-tts proxy. The `-ffmpeg` variant
# bundles ffmpeg so response_format=wav (PCM16) works. Floating tag (no semver
# discipline upstream) — the namespace is Keel-enrolled so digest bumps roll in
# automatically; TF owns only the tag string.
default = "travisvn/openai-edge-tts:latest-ffmpeg"
description = "openai-edge-tts image (ffmpeg variant — needed for wav output)."
}
variable "bg_voice" {
type = string
default = "bg-BG-KalinaNeural"
description = "Microsoft edge-tts neural Bulgarian voice (the Gateway's TTS_VOICE_BG must match)."
}
variable "en_voice" {
type = string
default = "en-US-AvaNeural"
description = "Microsoft edge-tts neural English voice (the Gateway's TTS_VOICE_EN must match)."
}
locals {
namespace = "portal-tts"
labels = { app = "portal-tts" }
}
resource "kubernetes_namespace" "portal_tts" {
metadata {
name = local.namespace
labels = {
tier = local.tiers.aux # CPU-only best-effort helper, not a GPU tenant
"istio-injection" = "disabled"
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
# Always-on openai-edge-tts. replicas=1, never scaled to zero (no GPU to free,
# negligible idle cost — it's a thin proxy to Microsoft edge-tts). CPU-only: NO
# node_selector / toleration / nvidia.com/gpu. No init container and no volumes:
# voices are fetched from Microsoft per request, so the pod is stateless.
resource "kubernetes_deployment" "portal_tts" {
metadata {
name = "portal-tts"
namespace = kubernetes_namespace.portal_tts.metadata[0].name
labels = merge(local.labels, { tier = local.tiers.aux })
}
spec {
replicas = 1
strategy { type = "Recreate" }
selector {
match_labels = { app = "portal-tts" }
}
template {
metadata {
labels = { app = "portal-tts" }
}
spec {
container {
name = "portal-tts"
image = var.edge_tts_image
# openai-edge-tts listens on :5050 by default; the Service maps 8000 ->
# 5050 so the Gateway's TTS_URL (:8000) is unchanged.
port {
container_port = 5050
name = "http"
}
# No API key: ClusterIP-only, the Gateway holds edge auth and sends no
# Authorization header to TTS. DEFAULT_VOICE is a fallback only — every
# request carries an explicit voice + response_format.
env {
name = "REQUIRE_API_KEY"
value = "False"
}
env {
name = "DEFAULT_VOICE"
value = var.en_voice
}
# TCP probes — uvicorn binds :5050 only once the app is ready. No model
# download, so startup is fast; egress to Microsoft happens per request.
startup_probe {
tcp_socket { port = 5050 }
period_seconds = 5
failure_threshold = 24 # ~2 min
}
readiness_probe {
tcp_socket { port = 5050 }
period_seconds = 15
failure_threshold = 4
}
liveness_probe {
tcp_socket { port = 5050 }
initial_delay_seconds = 20
period_seconds = 30
failure_threshold = 5
}
resources {
# Thin HTTP proxy to Microsoft edge-tts + ffmpeg transcode. Light on
# CPU (no CPU limit — cluster CFS-throttling policy). VERIFY with krr
# after real traffic and tighten.
requests = {
cpu = "50m"
memory = "256Mi"
}
limits = {
memory = "512Mi"
}
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
# Keel is enrolled (floating tag) — ignore its annotation churn but let the
# tag string keep applying from TF.
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
# ClusterIP — in-cluster only (the Gateway calls this; audio stays on the LAN
# until the Gateway speaks it to the Portal). No ingress, no Authentik: the
# Gateway is the only externally exposed component (ADR-0001). OpenAI speech path:
# http://portal-tts.portal-tts.svc.cluster.local:8000/v1/audio/speech
resource "kubernetes_service" "portal_tts" {
metadata {
name = "portal-tts"
namespace = kubernetes_namespace.portal_tts.metadata[0].name
labels = local.labels
annotations = {
# openai-edge-tts has no /metrics; annotation-based scrape kept on a live
# path so the Service stays in the scrape set (Ready-endpoint relabeling
# filters non-Ready pods). /v1/models is the OpenAI model list.
"prometheus.io/scrape" = "true"
"prometheus.io/path" = "/v1/models"
"prometheus.io/port" = "8000"
}
}
spec {
type = "ClusterIP"
selector = { app = "portal-tts" }
port {
name = "http"
port = 8000
target_port = 5050
}
}
}