231 lines
7.2 KiB
Terraform
231 lines
7.2 KiB
Terraform
|
|
# =============================================================================
|
||
|
|
# portal-assistant gateway — voice orchestrator (STT -> Brain -> TTS)
|
||
|
|
# =============================================================================
|
||
|
|
# The single service the Client app talks to: POST /v1/talk takes a WAV + a
|
||
|
|
# client id, runs Speaches STT -> the claude-agent-service conversational Brain
|
||
|
|
# -> Piper TTS, and returns the spoken reply. v1: ClusterIP only (E2E tested
|
||
|
|
# in-cluster). In-memory sessions (no SESSION_DB_DSN). See portal-assistant
|
||
|
|
# ADR-0001/0002/0003. Public Cloudflare ingress + device-token edge is the next
|
||
|
|
# increment.
|
||
|
|
# =============================================================================
|
||
|
|
|
||
|
|
data "vault_kv_secret_v2" "viktor" {
|
||
|
|
mount = "secret"
|
||
|
|
name = "viktor"
|
||
|
|
}
|
||
|
|
|
||
|
|
data "vault_kv_secret_v2" "cas" {
|
||
|
|
mount = "secret"
|
||
|
|
name = "claude-agent-service"
|
||
|
|
}
|
||
|
|
|
||
|
|
data "vault_kv_secret_v2" "pa" {
|
||
|
|
mount = "secret"
|
||
|
|
name = "portal-assistant"
|
||
|
|
}
|
||
|
|
|
||
|
|
locals {
|
||
|
|
namespace = "portal-assistant"
|
||
|
|
labels = { app = "portal-assistant-gateway" }
|
||
|
|
image = "ghcr.io/viktorbarzin/portal-assistant-gateway:latest"
|
||
|
|
}
|
||
|
|
|
||
|
|
resource "kubernetes_namespace" "portal_assistant" {
|
||
|
|
metadata {
|
||
|
|
name = local.namespace
|
||
|
|
labels = {
|
||
|
|
tier = local.tiers.edge
|
||
|
|
"istio-injection" = "disabled"
|
||
|
|
"keel.sh/enrolled" = "true"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
lifecycle {
|
||
|
|
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Pull secret — the gateway image is a PRIVATE ghcr package. Uses the read-only
|
||
|
|
# ghcr_pull_token (secret/viktor), the same cred the cluster-wide allowlist uses.
|
||
|
|
resource "kubernetes_secret" "ghcr" {
|
||
|
|
metadata {
|
||
|
|
name = "ghcr-pull"
|
||
|
|
namespace = kubernetes_namespace.portal_assistant.metadata[0].name
|
||
|
|
}
|
||
|
|
type = "kubernetes.io/dockerconfigjson"
|
||
|
|
data = {
|
||
|
|
".dockerconfigjson" = jsonencode({
|
||
|
|
auths = {
|
||
|
|
"ghcr.io" = {
|
||
|
|
username = "viktorbarzin"
|
||
|
|
password = data.vault_kv_secret_v2.viktor.data["ghcr_pull_token"]
|
||
|
|
auth = base64encode("viktorbarzin:${data.vault_kv_secret_v2.viktor.data["ghcr_pull_token"]}")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Tokens the gateway needs: BRAIN_TOKEN = claude-agent-service's bearer (to call
|
||
|
|
# the conversational endpoint); DEVICE_TOKEN = the per-Client secret the Portal
|
||
|
|
# app carries to authenticate to /v1/talk.
|
||
|
|
resource "kubernetes_secret" "gateway" {
|
||
|
|
metadata {
|
||
|
|
name = "portal-assistant-gateway-secrets"
|
||
|
|
namespace = kubernetes_namespace.portal_assistant.metadata[0].name
|
||
|
|
}
|
||
|
|
data = {
|
||
|
|
BRAIN_TOKEN = data.vault_kv_secret_v2.cas.data["api_bearer_token"]
|
||
|
|
DEVICE_TOKEN = data.vault_kv_secret_v2.pa.data["device_token"]
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
resource "kubernetes_deployment" "gateway" {
|
||
|
|
metadata {
|
||
|
|
name = "portal-assistant-gateway"
|
||
|
|
namespace = kubernetes_namespace.portal_assistant.metadata[0].name
|
||
|
|
labels = merge(local.labels, { tier = local.tiers.edge })
|
||
|
|
}
|
||
|
|
spec {
|
||
|
|
replicas = 1
|
||
|
|
selector {
|
||
|
|
match_labels = { app = "portal-assistant-gateway" }
|
||
|
|
}
|
||
|
|
template {
|
||
|
|
metadata {
|
||
|
|
labels = { app = "portal-assistant-gateway" }
|
||
|
|
}
|
||
|
|
spec {
|
||
|
|
image_pull_secrets {
|
||
|
|
name = kubernetes_secret.ghcr.metadata[0].name
|
||
|
|
}
|
||
|
|
container {
|
||
|
|
name = "gateway"
|
||
|
|
image = local.image
|
||
|
|
image_pull_policy = "Always"
|
||
|
|
port {
|
||
|
|
container_port = 8000
|
||
|
|
name = "http"
|
||
|
|
}
|
||
|
|
# STT -> Speaches; TTS -> Piper; Brain -> claude-agent-service.
|
||
|
|
env {
|
||
|
|
name = "STT_URL"
|
||
|
|
value = "http://portal-stt.portal-stt.svc.cluster.local:8000"
|
||
|
|
}
|
||
|
|
env {
|
||
|
|
name = "STT_MODEL"
|
||
|
|
value = "deepdml/faster-whisper-large-v3-turbo-ct2"
|
||
|
|
}
|
||
|
|
env {
|
||
|
|
name = "TTS_URL"
|
||
|
|
value = "http://portal-tts.portal-tts.svc.cluster.local:8000"
|
||
|
|
}
|
||
|
|
# portal-tts now serves Microsoft edge-tts neural voices (Piper's
|
||
|
|
# Bulgarian was garbled; 2026-06-17). The gateway maps detected lang
|
||
|
|
# bg/en -> these edge voice names, which openai-edge-tts accepts directly.
|
||
|
|
env {
|
||
|
|
name = "TTS_VOICE_BG"
|
||
|
|
value = "bg-BG-KalinaNeural"
|
||
|
|
}
|
||
|
|
env {
|
||
|
|
name = "TTS_VOICE_EN"
|
||
|
|
value = "en-US-AvaNeural"
|
||
|
|
}
|
||
|
|
env {
|
||
|
|
name = "BRAIN_URL"
|
||
|
|
value = "http://claude-agent-service.claude-agent.svc.cluster.local:8080"
|
||
|
|
}
|
||
|
|
env {
|
||
|
|
name = "BRAIN_TOKEN"
|
||
|
|
value_from {
|
||
|
|
secret_key_ref {
|
||
|
|
name = kubernetes_secret.gateway.metadata[0].name
|
||
|
|
key = "BRAIN_TOKEN"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
env {
|
||
|
|
name = "DEVICE_TOKEN"
|
||
|
|
value_from {
|
||
|
|
secret_key_ref {
|
||
|
|
name = kubernetes_secret.gateway.metadata[0].name
|
||
|
|
key = "DEVICE_TOKEN"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
readiness_probe {
|
||
|
|
http_get {
|
||
|
|
path = "/health"
|
||
|
|
port = 8000
|
||
|
|
}
|
||
|
|
period_seconds = 10
|
||
|
|
}
|
||
|
|
liveness_probe {
|
||
|
|
http_get {
|
||
|
|
path = "/health"
|
||
|
|
port = 8000
|
||
|
|
}
|
||
|
|
initial_delay_seconds = 15
|
||
|
|
period_seconds = 30
|
||
|
|
}
|
||
|
|
resources {
|
||
|
|
requests = {
|
||
|
|
cpu = "50m"
|
||
|
|
memory = "256Mi"
|
||
|
|
}
|
||
|
|
limits = {
|
||
|
|
memory = "512Mi"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
lifecycle {
|
||
|
|
ignore_changes = [
|
||
|
|
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
|
||
|
|
]
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# ClusterIP — the only externally-exposed component (ADR-0001) gets its public
|
||
|
|
# Cloudflare ingress in the next increment; here it's reachable in-cluster for
|
||
|
|
# the E2E smoke. /metrics scraped by Prometheus.
|
||
|
|
resource "kubernetes_service" "gateway" {
|
||
|
|
metadata {
|
||
|
|
name = "portal-assistant-gateway"
|
||
|
|
namespace = kubernetes_namespace.portal_assistant.metadata[0].name
|
||
|
|
labels = local.labels
|
||
|
|
annotations = {
|
||
|
|
"prometheus.io/scrape" = "true"
|
||
|
|
"prometheus.io/path" = "/metrics"
|
||
|
|
"prometheus.io/port" = "8000"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
spec {
|
||
|
|
type = "ClusterIP"
|
||
|
|
selector = { app = "portal-assistant-gateway" }
|
||
|
|
port {
|
||
|
|
name = "http"
|
||
|
|
port = 8000
|
||
|
|
target_port = 8000
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
# Public Cloudflare ingress — the Portal app reaches the gateway at
|
||
|
|
# https://portal-assistant.viktorbarzin.me/v1/talk. tls-secret is Kyverno-synced
|
||
|
|
# into the namespace. The gateway holds its own edge auth (the DEVICE_TOKEN
|
||
|
|
# bearer), so no Authentik in front.
|
||
|
|
module "ingress" {
|
||
|
|
source = "../../modules/kubernetes/ingress_factory"
|
||
|
|
name = "portal-assistant"
|
||
|
|
namespace = kubernetes_namespace.portal_assistant.metadata[0].name
|
||
|
|
service_name = kubernetes_service.gateway.metadata[0].name
|
||
|
|
port = 8000
|
||
|
|
tls_secret_name = "tls-secret"
|
||
|
|
# auth = "app": the gateway enforces its own DEVICE_TOKEN bearer on /v1/talk; Authentik would break the native Portal client (it has no browser login).
|
||
|
|
auth = "app"
|
||
|
|
dns_type = "proxied"
|
||
|
|
max_body_size = "25m" # audio (WAV) uploads
|
||
|
|
}
|