[ci skip] Deploy Wyoming Whisper STT service for Home Assistant voice input

Add Wyoming Faster Whisper (rhasspy/wyoming-whisper) as a new K8s service
exposed via Traefik TCP entrypoint on port 10300. Accessible from ha-london
RPi via VPN at 10.0.20.202:10300.
This commit is contained in:
Viktor Barzin 2026-02-08 00:09:03 +00:00
parent e067504170
commit d89947c2fd
No known key found for this signature in database
GPG key ID: 0EB088298288D958
4 changed files with 166 additions and 13 deletions

View file

@ -149,6 +149,7 @@ When configuring services to use the mailserver:
- Immich: v2.4.1
- Freedify: latest (music streaming, factory pattern)
- AFFiNE: stable (visual canvas, uses PostgreSQL + Redis)
- Wyoming Whisper: latest (STT for Home Assistant, CPU on GPU node)
## Useful Commands
```bash
@ -274,6 +275,7 @@ Top-level modules in `main.tf`:
| frigate | NVR/camera (GPU) | gpu |
| ebook2audiobook | E-book to audio (GPU) | gpu |
| affine | Visual canvas/whiteboard (PostgreSQL + Redis) | aux |
| whisper | Wyoming Faster Whisper STT (CPU on GPU node) | gpu |
---
@ -441,3 +443,14 @@ Skills are specialized workflows for common tasks. Located in `.claude/skills/`.
- `MAILER_*` - SMTP configuration for email invites
- **Local-first**: Data stored in browser by default; syncs to server when user creates account
- **Docs**: https://docs.affine.pro/self-host-affine
### Wyoming Whisper (STT for Home Assistant)
- **Image**: `rhasspy/wyoming-whisper:latest`
- **Port**: 10300/TCP (Wyoming protocol)
- **Model**: `small-int8` (CPU-optimized, no CUDA variant available from upstream)
- **Runs on**: GPU node (node_selector gpu=true + nvidia toleration) but uses CPU only
- **Storage**: NFS at `/mnt/main/whisper``/data` (model cache)
- **Exposure**: Internal only via Traefik TCP entrypoint `whisper-tcp` → IngressRouteTCP
- **Access**: `10.0.20.202:10300` (Traefik LB IP, no public DNS)
- **HA Integration**: Wyoming Protocol integration in ha-london, host `10.0.20.202`, port `10300`
- **No GPU acceleration**: Official image is CPU-only (Debian + PyTorch CPU). The `mib1185/wyoming-faster-whisper-cuda` image exists but requires self-build.

View file

@ -83,6 +83,7 @@ variable "public_ip" {}
variable "cloudflare_proxied_names" {}
variable "cloudflare_non_proxied_names" {}
variable "owntracks_credentials" {}
variable "ollama_api_credentials" {}
variable "dawarich_database_password" {}
variable "geoapify_api_key" {}
variable "tandoor_database_password" {}
@ -134,8 +135,8 @@ variable "defcon_level" {
locals {
defcon_modules = {
1 : ["wireguard", "technitium", "headscale", "traefik", "xray", "authentik", "cloudflare", "authelia", "monitoring"], # Critical connectivity services
2 : ["vaultwarden", "redis", "immich", "nvidia", "metrics-server", "uptime-kuma", "crowdsec", "kyverno"], # Storage and other db services
3 : ["reverse-proxy"], # Cluster admin services (k8s-dashboard chart repo still 404)
2 : ["vaultwarden", "redis", "immich", "nvidia", "metrics-server", "uptime-kuma", "crowdsec", "kyverno"], # Storage and other db services
3 : ["reverse-proxy"], # Cluster admin services (k8s-dashboard chart repo still 404)
4 : [
"mailserver", "shadowsocks", "webhook_handler", "tuya-bridge", "dawarich", "owntracks", "nextcloud",
"calibre", "onlyoffice", "f1-stream", "rybbit", "isponsorblocktv", "actualbudget"
@ -146,7 +147,7 @@ locals {
"url", "excalidraw", "travel_blog", "dashy", "send", "ytdlp", "wealthfolio", "rybbit", "stirling-pdf",
"networking-toolbox", "navidrome", "freshrss", "forgejo", "tor-proxy", "real-estate-crawler", "n8n",
"changedetection", "linkwarden", "matrix", "homepage", "meshcentral", "diun", "cyberchef", "ntfy", "ollama",
"servarr", "jsoncrack", "paperless-ngx", "frigate", "audiobookshelf", "tandoor", "ebook2audiobook", "netbox", "speedtest", "resume", "freedify", "mcaptcha", "affine", "plotting-book"
"servarr", "jsoncrack", "paperless-ngx", "frigate", "audiobookshelf", "tandoor", "ebook2audiobook", "netbox", "speedtest", "resume", "freedify", "mcaptcha", "affine", "plotting-book", "whisper"
],
}
active_modules = distinct(flatten([
@ -729,10 +730,11 @@ module "servarr" {
# }
module "ollama" { # Disabled as it requires too much resources...
source = "./ollama"
for_each = contains(local.active_modules, "ollama") ? { ollama = true } : {}
tls_secret_name = var.tls_secret_name
tier = local.tiers.gpu
source = "./ollama"
for_each = contains(local.active_modules, "ollama") ? { ollama = true } : {}
tls_secret_name = var.tls_secret_name
tier = local.tiers.gpu
ollama_api_credentials = var.ollama_api_credentials
depends_on = [null_resource.core_services]
}
@ -1086,3 +1088,12 @@ module "plotting-book" {
depends_on = [null_resource.core_services]
}
module "whisper" {
source = "./whisper"
for_each = contains(local.active_modules, "whisper") ? { whisper = true } : {}
tls_secret_name = var.tls_secret_name
tier = local.tiers.gpu
depends_on = [null_resource.core_services]
}

View file

@ -46,14 +46,14 @@ resource "helm_release" "traefik" {
providers = {
kubernetesIngress = {
enabled = true
allowExternalNameServices = true
publishedService = { enabled = true }
enabled = true
allowExternalNameServices = true
publishedService = { enabled = true }
}
kubernetesCRD = {
enabled = true
allowExternalNameServices = true
allowCrossNamespace = true
enabled = true
allowExternalNameServices = true
allowCrossNamespace = true
}
}
@ -97,6 +97,12 @@ resource "helm_release" "traefik" {
protocol = "UDP"
expose = { default = true }
}
whisper-tcp = {
port = 10300
exposedPort = 10300
protocol = "TCP"
expose = { default = true }
}
}
service = {

View file

@ -0,0 +1,123 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
resource "kubernetes_namespace" "whisper" {
metadata {
name = "whisper"
}
}
module "tls_secret" {
source = "../setup_tls_secret"
namespace = kubernetes_namespace.whisper.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "whisper" {
metadata {
name = "whisper"
namespace = kubernetes_namespace.whisper.metadata[0].name
labels = {
app = "whisper"
tier = var.tier
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "whisper"
}
}
template {
metadata {
labels = {
app = "whisper"
}
}
spec {
node_selector = {
"gpu" : "true"
}
toleration {
key = "nvidia.com/gpu"
operator = "Equal"
value = "true"
effect = "NoSchedule"
}
container {
name = "whisper"
image = "rhasspy/wyoming-whisper:latest"
args = ["--model", "small-int8", "--language", "en", "--beam-size", "1"]
port {
container_port = 10300
protocol = "TCP"
}
volume_mount {
name = "data"
mount_path = "/data"
}
}
volume {
name = "data"
nfs {
server = "10.0.10.15"
path = "/mnt/main/whisper"
}
}
}
}
}
}
resource "kubernetes_service" "whisper" {
metadata {
name = "whisper"
namespace = kubernetes_namespace.whisper.metadata[0].name
labels = {
app = "whisper"
}
}
spec {
selector = {
app = "whisper"
}
port {
name = "wyoming"
port = 10300
target_port = 10300
protocol = "TCP"
}
}
}
# TCP passthrough from Traefik to whisper service
resource "kubernetes_manifest" "whisper_tcp_ingressroute" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "IngressRouteTCP"
metadata = {
name = "whisper-tcp"
namespace = "traefik"
}
spec = {
entryPoints = ["whisper-tcp"]
routes = [{
match = "HostSNI(`*`)"
services = [{
name = "whisper"
namespace = "whisper"
port = 10300
}]
}]
}
}
}