infra/stacks/portal-assistant/main.tf

# =============================================================================
# portal-assistant gateway — voice orchestrator (STT -> Brain -> TTS)
# =============================================================================
# The single service the Client app talks to: POST /v1/talk takes a WAV + a
# client id, runs Speaches STT -> the claude-agent-service conversational Brain
# -> Piper TTS, and returns the spoken reply. v1: ClusterIP only (E2E tested
# in-cluster). In-memory sessions (no SESSION_DB_DSN). See portal-assistant
# ADR-0001/0002/0003. Public Cloudflare ingress + device-token edge is the next
# increment.
# =============================================================================

data "vault_kv_secret_v2" "viktor" {
  mount = "secret"
  name  = "viktor"
}

data "vault_kv_secret_v2" "cas" {
  mount = "secret"
  name  = "claude-agent-service"
}

data "vault_kv_secret_v2" "pa" {
  mount = "secret"
  name  = "portal-assistant"
}

locals {
  namespace = "portal-assistant"
  labels    = { app = "portal-assistant-gateway" }
  image     = "ghcr.io/viktorbarzin/portal-assistant-gateway:latest"
}

resource "kubernetes_namespace" "portal_assistant" {
  metadata {
    name = local.namespace
    labels = {
      tier               = local.tiers.edge
      "istio-injection"  = "disabled"
      "keel.sh/enrolled" = "true"
    }
  }
  lifecycle {
    ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
  }
}

# Pull secret — the gateway image is a PRIVATE ghcr package. Uses the read-only
# ghcr_pull_token (secret/viktor), the same cred the cluster-wide allowlist uses.
resource "kubernetes_secret" "ghcr" {
  metadata {
    name      = "ghcr-pull"
    namespace = kubernetes_namespace.portal_assistant.metadata[0].name
  }
  type = "kubernetes.io/dockerconfigjson"
  data = {
    ".dockerconfigjson" = jsonencode({
      auths = {
        "ghcr.io" = {
          username = "viktorbarzin"
          password = data.vault_kv_secret_v2.viktor.data["ghcr_pull_token"]
          auth     = base64encode("viktorbarzin:${data.vault_kv_secret_v2.viktor.data["ghcr_pull_token"]}")
        }
      }
    })
  }
}

# Tokens the gateway needs: BRAIN_TOKEN = claude-agent-service's bearer (to call
# the conversational endpoint); DEVICE_TOKEN = the per-Client secret the Portal
# app carries to authenticate to /v1/talk.
resource "kubernetes_secret" "gateway" {
  metadata {
    name      = "portal-assistant-gateway-secrets"
    namespace = kubernetes_namespace.portal_assistant.metadata[0].name
  }
  data = {
    BRAIN_TOKEN  = data.vault_kv_secret_v2.cas.data["api_bearer_token"]
    DEVICE_TOKEN = data.vault_kv_secret_v2.pa.data["device_token"]
  }
}

resource "kubernetes_deployment" "gateway" {
  metadata {
    name      = "portal-assistant-gateway"
    namespace = kubernetes_namespace.portal_assistant.metadata[0].name
    labels    = merge(local.labels, { tier = local.tiers.edge })
  }
  spec {
    replicas = 1
    selector {
      match_labels = { app = "portal-assistant-gateway" }
    }
    template {
      metadata {
        labels = { app = "portal-assistant-gateway" }
      }
      spec {
        image_pull_secrets {
          name = kubernetes_secret.ghcr.metadata[0].name
        }
        container {
          name              = "gateway"
          image             = local.image
          image_pull_policy = "Always"
          port {
            container_port = 8000
            name           = "http"
          }
          # STT -> Speaches; TTS -> Piper; Brain -> claude-agent-service.
          env {
            name  = "STT_URL"
            value = "http://portal-stt.portal-stt.svc.cluster.local:8000"
          }
          env {
            name  = "STT_MODEL"
            value = "deepdml/faster-whisper-large-v3-turbo-ct2"
          }
          env {
            name  = "TTS_URL"
            value = "http://portal-tts.portal-tts.svc.cluster.local:8000"
          }
          # portal-tts now serves Microsoft edge-tts neural voices (Piper's
          # Bulgarian was garbled; 2026-06-17). The gateway maps detected lang
          # bg/en -> these edge voice names, which openai-edge-tts accepts directly.
          env {
            name  = "TTS_VOICE_BG"
            value = "bg-BG-KalinaNeural"
          }
          env {
            name  = "TTS_VOICE_EN"
            value = "en-US-AvaNeural"
          }
          env {
            name  = "BRAIN_URL"
            value = "http://claude-agent-service.claude-agent.svc.cluster.local:8080"
          }
          env {
            name = "BRAIN_TOKEN"
            value_from {
              secret_key_ref {
                name = kubernetes_secret.gateway.metadata[0].name
                key  = "BRAIN_TOKEN"
              }
            }
          }
          env {
            name = "DEVICE_TOKEN"
            value_from {
              secret_key_ref {
                name = kubernetes_secret.gateway.metadata[0].name
                key  = "DEVICE_TOKEN"
              }
            }
          }
          readiness_probe {
            http_get {
              path = "/health"
              port = 8000
            }
            period_seconds = 10
          }
          liveness_probe {
            http_get {
              path = "/health"
              port = 8000
            }
            initial_delay_seconds = 15
            period_seconds        = 30
          }
          resources {
            requests = {
              cpu    = "50m"
              memory = "256Mi"
            }
            limits = {
              memory = "512Mi"
            }
          }
        }
      }
    }
  }
  lifecycle {
    ignore_changes = [
      spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
    ]
  }
}

# ClusterIP — the only externally-exposed component (ADR-0001) gets its public
# Cloudflare ingress in the next increment; here it's reachable in-cluster for
# the E2E smoke. /metrics scraped by Prometheus.
resource "kubernetes_service" "gateway" {
  metadata {
    name      = "portal-assistant-gateway"
    namespace = kubernetes_namespace.portal_assistant.metadata[0].name
    labels    = local.labels
    annotations = {
      "prometheus.io/scrape" = "true"
      "prometheus.io/path"   = "/metrics"
      "prometheus.io/port"   = "8000"
    }
  }
  spec {
    type     = "ClusterIP"
    selector = { app = "portal-assistant-gateway" }
    port {
      name        = "http"
      port        = 8000
      target_port = 8000
    }
  }
}

# Public Cloudflare ingress — the Portal app reaches the gateway at
# https://portal-assistant.viktorbarzin.me/v1/talk. tls-secret is Kyverno-synced
# into the namespace. The gateway holds its own edge auth (the DEVICE_TOKEN
# bearer), so no Authentik in front.
module "ingress" {
  source          = "../../modules/kubernetes/ingress_factory"
  name            = "portal-assistant"
  namespace       = kubernetes_namespace.portal_assistant.metadata[0].name
  service_name    = kubernetes_service.gateway.metadata[0].name
  port            = 8000
  tls_secret_name = "tls-secret"
  # auth = "app": the gateway enforces its own DEVICE_TOKEN bearer on /v1/talk; Authentik would break the native Portal client (it has no browser login).
  auth          = "app"
  dns_type      = "proxied"
  max_body_size = "25m" # audio (WAV) uploads
}
portal-assistant: land voice stacks + switch TTS to edge-tts (intelligible Bulgarian) The portal-assistant voice-assistant stacks (portal-tts, portal-stt, portal-assistant) were applied to the live cluster from feature branches but never landed on master — the GitOps source of truth. This lands all three and, in portal-tts, fixes Bulgarian speech. Bulgarian was unintelligible: the local Piper voice (bg_BG-dimitar-medium via espeak-ng) mangles Bulgarian consonants — a synth->Whisper round-trip turned "Добър ден" into "Обърден", and a user heard pure gibberish. English was fine. portal-tts now runs openai-edge-tts (Microsoft edge-tts neural voices) for BOTH languages instead of Piper — ADR-0003 always named edge-tts as the online Bulgarian-quality fallback. Validated before landing: edge bg round-trips through Whisper verbatim ("Добър ден! Как сте днес? ..."). The gateway maps detected language bg/en to the edge voice names via new TTS_VOICE_BG / TTS_VOICE_EN env (bg-BG-KalinaNeural / en-US-AvaNeural). No GPU, no NFS model store, no secrets — edge fetches voices from Microsoft per request (egress verified). The assistant already needs the internet for the Claude brain, so an online TTS adds no new failure mode. The brain stays Sonnet with no extended thinking (already the default — a live turn answers directly in ~3.4s), per the latency-over-smartness ask. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> 2026-06-17 20:25:29 +00:00			`# =============================================================================`
			`# portal-assistant gateway — voice orchestrator (STT -> Brain -> TTS)`
			`# =============================================================================`
			`# The single service the Client app talks to: POST /v1/talk takes a WAV + a`
			`# client id, runs Speaches STT -> the claude-agent-service conversational Brain`
			`# -> Piper TTS, and returns the spoken reply. v1: ClusterIP only (E2E tested`
			`# in-cluster). In-memory sessions (no SESSION_DB_DSN). See portal-assistant`
			`# ADR-0001/0002/0003. Public Cloudflare ingress + device-token edge is the next`
			`# increment.`
			`# =============================================================================`

			`data "vault_kv_secret_v2" "viktor" {`
			`mount = "secret"`
			`name = "viktor"`
			`}`

			`data "vault_kv_secret_v2" "cas" {`
			`mount = "secret"`
			`name = "claude-agent-service"`
			`}`

			`data "vault_kv_secret_v2" "pa" {`
			`mount = "secret"`
			`name = "portal-assistant"`
			`}`

			`locals {`
			`namespace = "portal-assistant"`
			`labels = { app = "portal-assistant-gateway" }`
			`image = "ghcr.io/viktorbarzin/portal-assistant-gateway:latest"`
			`}`

			`resource "kubernetes_namespace" "portal_assistant" {`
			`metadata {`
			`name = local.namespace`
			`labels = {`
			`tier = local.tiers.edge`
			`"istio-injection" = "disabled"`
			`"keel.sh/enrolled" = "true"`
			`}`
			`}`
			`lifecycle {`
			`ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]`
			`}`
			`}`

			`# Pull secret — the gateway image is a PRIVATE ghcr package. Uses the read-only`
			`# ghcr_pull_token (secret/viktor), the same cred the cluster-wide allowlist uses.`
			`resource "kubernetes_secret" "ghcr" {`
			`metadata {`
			`name = "ghcr-pull"`
			`namespace = kubernetes_namespace.portal_assistant.metadata[0].name`
			`}`
			`type = "kubernetes.io/dockerconfigjson"`
			`data = {`
			`".dockerconfigjson" = jsonencode({`
			`auths = {`
			`"ghcr.io" = {`
			`username = "viktorbarzin"`
			`password = data.vault_kv_secret_v2.viktor.data["ghcr_pull_token"]`
			`auth = base64encode("viktorbarzin:${data.vault_kv_secret_v2.viktor.data["ghcr_pull_token"]}")`
			`}`
			`}`
			`})`
			`}`
			`}`

			`# Tokens the gateway needs: BRAIN_TOKEN = claude-agent-service's bearer (to call`
			`# the conversational endpoint); DEVICE_TOKEN = the per-Client secret the Portal`
			`# app carries to authenticate to /v1/talk.`
			`resource "kubernetes_secret" "gateway" {`
			`metadata {`
			`name = "portal-assistant-gateway-secrets"`
			`namespace = kubernetes_namespace.portal_assistant.metadata[0].name`
			`}`
			`data = {`
			`BRAIN_TOKEN = data.vault_kv_secret_v2.cas.data["api_bearer_token"]`
			`DEVICE_TOKEN = data.vault_kv_secret_v2.pa.data["device_token"]`
			`}`
			`}`

			`resource "kubernetes_deployment" "gateway" {`
			`metadata {`
			`name = "portal-assistant-gateway"`
			`namespace = kubernetes_namespace.portal_assistant.metadata[0].name`
			`labels = merge(local.labels, { tier = local.tiers.edge })`
			`}`
			`spec {`
			`replicas = 1`
			`selector {`
			`match_labels = { app = "portal-assistant-gateway" }`
			`}`
			`template {`
			`metadata {`
			`labels = { app = "portal-assistant-gateway" }`
			`}`
			`spec {`
			`image_pull_secrets {`
			`name = kubernetes_secret.ghcr.metadata[0].name`
			`}`
			`container {`
			`name = "gateway"`
			`image = local.image`
			`image_pull_policy = "Always"`
			`port {`
			`container_port = 8000`
			`name = "http"`
			`}`
			`# STT -> Speaches; TTS -> Piper; Brain -> claude-agent-service.`
			`env {`
			`name = "STT_URL"`
			`value = "http://portal-stt.portal-stt.svc.cluster.local:8000"`
			`}`
			`env {`
			`name = "STT_MODEL"`
			`value = "deepdml/faster-whisper-large-v3-turbo-ct2"`
			`}`
			`env {`
			`name = "TTS_URL"`
			`value = "http://portal-tts.portal-tts.svc.cluster.local:8000"`
			`}`
			`# portal-tts now serves Microsoft edge-tts neural voices (Piper's`
			`# Bulgarian was garbled; 2026-06-17). The gateway maps detected lang`
			`# bg/en -> these edge voice names, which openai-edge-tts accepts directly.`
			`env {`
			`name = "TTS_VOICE_BG"`
			`value = "bg-BG-KalinaNeural"`
			`}`
			`env {`
			`name = "TTS_VOICE_EN"`
			`value = "en-US-AvaNeural"`
			`}`
			`env {`
			`name = "BRAIN_URL"`
			`value = "http://claude-agent-service.claude-agent.svc.cluster.local:8080"`
			`}`
			`env {`
			`name = "BRAIN_TOKEN"`
			`value_from {`
			`secret_key_ref {`
			`name = kubernetes_secret.gateway.metadata[0].name`
			`key = "BRAIN_TOKEN"`
			`}`
			`}`
			`}`
			`env {`
			`name = "DEVICE_TOKEN"`
			`value_from {`
			`secret_key_ref {`
			`name = kubernetes_secret.gateway.metadata[0].name`
			`key = "DEVICE_TOKEN"`
			`}`
			`}`
			`}`
			`readiness_probe {`
			`http_get {`
			`path = "/health"`
			`port = 8000`
			`}`
			`period_seconds = 10`
			`}`
			`liveness_probe {`
			`http_get {`
			`path = "/health"`
			`port = 8000`
			`}`
			`initial_delay_seconds = 15`
			`period_seconds = 30`
			`}`
			`resources {`
			`requests = {`
			`cpu = "50m"`
			`memory = "256Mi"`
			`}`
			`limits = {`
			`memory = "512Mi"`
			`}`
			`}`
			`}`
			`}`
			`}`
			`}`
			`lifecycle {`
			`ignore_changes = [`
			`spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1`
			`]`
			`}`
			`}`

			`# ClusterIP — the only externally-exposed component (ADR-0001) gets its public`
			`# Cloudflare ingress in the next increment; here it's reachable in-cluster for`
			`# the E2E smoke. /metrics scraped by Prometheus.`
			`resource "kubernetes_service" "gateway" {`
			`metadata {`
			`name = "portal-assistant-gateway"`
			`namespace = kubernetes_namespace.portal_assistant.metadata[0].name`
			`labels = local.labels`
			`annotations = {`
			`"prometheus.io/scrape" = "true"`
			`"prometheus.io/path" = "/metrics"`
			`"prometheus.io/port" = "8000"`
			`}`
			`}`
			`spec {`
			`type = "ClusterIP"`
			`selector = { app = "portal-assistant-gateway" }`
			`port {`
			`name = "http"`
			`port = 8000`
			`target_port = 8000`
			`}`
			`}`
			`}`

			`# Public Cloudflare ingress — the Portal app reaches the gateway at`
			`# https://portal-assistant.viktorbarzin.me/v1/talk. tls-secret is Kyverno-synced`
			`# into the namespace. The gateway holds its own edge auth (the DEVICE_TOKEN`
			`# bearer), so no Authentik in front.`
			`module "ingress" {`
			`source = "../../modules/kubernetes/ingress_factory"`
			`name = "portal-assistant"`
			`namespace = kubernetes_namespace.portal_assistant.metadata[0].name`
			`service_name = kubernetes_service.gateway.metadata[0].name`
			`port = 8000`
			`tls_secret_name = "tls-secret"`
			`# auth = "app": the gateway enforces its own DEVICE_TOKEN bearer on /v1/talk; Authentik would break the native Portal client (it has no browser login).`
			`auth = "app"`
			`dns_type = "proxied"`
			`max_body_size = "25m" # audio (WAV) uploads`
			`}`