diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 1df15f45..d579a35b 100755 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -131,7 +131,7 @@ Repo IDs: infra=1, Website=2, finance=3, health=4, travel_blog=5, webhook-handle | Service | Key Operational Knowledge | |---------|--------------------------| | Nextcloud | MaxRequestWorkers=150, needs 8Gi limit (Apache transient memory spikes, see commit eb94144), very generous startup probe | -| Immich | ML on SSD, disable ModSecurity (breaks streaming), CUDA for ML, frequent upgrades | +| Immich | ML on SSD (CUDA), disable ModSecurity (breaks streaming), frequent upgrades. **Video transcoding is GPU-accelerated**: `immich-server` is pinned to GPU node1 (nodeSelector `nvidia.com/gpu.present` + NoSchedule toleration + `gpu-workload` priority) with a time-sliced `nvidia.com/gpu=1` slice — the stock immich-server image's ffmpeg already ships h264/hevc_nvenc + NVDEC. Activated via `ffmpeg.accel=nvenc` + `accelDecode=true` in the **DB** system-config (`system_metadata` table, key `system-config`, JSONB — NOT Terraform; app config is DB-managed here like oauth/smtp). Direct DB edits need a pod **recreate** to reload (config is cached at boot; only API-driven changes broadcast a reload). If Immich is ever reinstalled fresh (not restored), re-set these two keys. Thumbnails/previews live on SSD NFS (sdb) — do NOT move to block storage (HDD sdc = slower + the contended IO domain). | | CrowdSec | Pin version, disable Metabase when not needed (CPU hog), LAPI scaled to 3, **DB on PostgreSQL** (migrated from MySQL), flush config: max_items=10000/max_age=7d/agents_autodelete=30d, DECISION_DURATION=168h in blocklist CronJob | | Frigate | GPU stall detection in liveness probe (inference speed check), high CPU | | Authentik | 3 replicas, PgBouncer in front of PostgreSQL, strip auth headers before forwarding | diff --git a/docs/architecture/compute.md b/docs/architecture/compute.md index 567ba5eb..fe27f730 100644 --- a/docs/architecture/compute.md +++ b/docs/architecture/compute.md @@ -330,10 +330,14 @@ label with it, and `null_resource.gpu_node_config` re-applies the next apply (discovery keyed on `feature.node.kubernetes.io/pci-10de.present=true`). -**GPU Workloads**: -- Ollama (LLM inference) -- ComfyUI (Stable Diffusion workflows) -- Stable Diffusion WebUI +**GPU Workloads** (time-sliced — node advertises `Tesla-T4-SHARED`, +`sharing-strategy=time-slicing`, `nvidia.com/gpu.replicas=100`, so many pods +share the single T4; request `nvidia.com/gpu: 1` for a slice, not the whole card): +- immich-machine-learning (CLIP smart-search + facial recognition, CUDA) +- immich-server (NVENC/NVDEC video transcoding — `ffmpeg.accel=nvenc` + `accelDecode=true`) +- Frigate (object-detection inference) +- llama-cpp / llama-swap (LLM inference) +- nvidia-exporter + gpu-pod-exporter (DCGM metrics) ## Configuration diff --git a/stacks/immich/frame.tf b/stacks/immich/frame.tf index e1a4471c..ab375aa4 100644 --- a/stacks/immich/frame.tf +++ b/stacks/immich/frame.tf @@ -96,8 +96,17 @@ resource "kubernetes_deployment" "immich-frame" { } } lifecycle { - # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2 - ignore_changes = [spec[0].template[0].spec[0].dns_config] + ignore_changes = [ + spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1 + metadata[0].annotations["keel.sh/policy"], + metadata[0].annotations["keel.sh/trigger"], + metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2 + metadata[0].annotations["keel.sh/match-tag"], + metadata[0].annotations["kubernetes.io/change-cause"], + metadata[0].annotations["deployment.kubernetes.io/revision"], + spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1 + spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE + ] } } diff --git a/stacks/immich/main.tf b/stacks/immich/main.tf index 87fae5fa..183b3e50 100644 --- a/stacks/immich/main.tf +++ b/stacks/immich/main.tf @@ -145,7 +145,7 @@ resource "kubernetes_namespace" "immich" { # so this stack can own the tier-quota with a higher memory cap. "resource-governance/custom-quota" = "true" tier = local.tiers.gpu - "keel.sh/enrolled" = "true" + "keel.sh/enrolled" = "true" } } lifecycle { @@ -225,7 +225,7 @@ resource "kubernetes_deployment" "immich_server" { metadata[0].annotations["kubernetes.io/change-cause"], metadata[0].annotations["deployment.kubernetes.io/revision"], spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1 - spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE + spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE ] } @@ -256,6 +256,19 @@ resource "kubernetes_deployment" "immich_server" { } spec { + # Pinned to the GPU node for NVENC hardware video transcoding (Tesla T4, + # time-sliced). The immich-server image's ffmpeg ships h264/hevc_nvenc; + # activation is via system-config ffmpeg.accel=nvenc. + priority_class_name = "gpu-workload" + node_selector = { + "nvidia.com/gpu.present" : "true" + } + toleration { + key = "nvidia.com/gpu" + operator = "Equal" + value = "true" + effect = "NoSchedule" + } container { name = "immich-server" image = "ghcr.io/immich-app/immich-server:${var.immich_version}" @@ -324,8 +337,8 @@ resource "kubernetes_deployment" "immich_server" { path = "/api/server/ping" port = "http" } - period_seconds = 10 - timeout_seconds = 1 + period_seconds = 10 + timeout_seconds = 1 # Bumped 30 → 360 (5min → 1h): after a PG restart, immich-server # reindexes the clip_index + face_index vector tables before binding # the API port. Hundreds of thousands of rows take longer than 5min @@ -371,7 +384,8 @@ resource "kubernetes_deployment" "immich_server" { memory = "8Gi" } limits = { - memory = "8Gi" + memory = "8Gi" + "nvidia.com/gpu" = "1" } } } @@ -462,7 +476,7 @@ resource "kubernetes_deployment" "immich-postgres" { metadata[0].annotations["keel.sh/policy"], metadata[0].annotations["keel.sh/trigger"], metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2 - spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE + spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE ] } @@ -636,7 +650,7 @@ resource "kubernetes_deployment" "immich-machine-learning" { metadata[0].annotations["kubernetes.io/change-cause"], metadata[0].annotations["deployment.kubernetes.io/revision"], spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1 - spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE + spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE ] }