diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md
index 1df15f45..d579a35b 100755
--- a/.claude/CLAUDE.md
+++ b/.claude/CLAUDE.md
@@ -131,7 +131,7 @@ Repo IDs: infra=1, Website=2, finance=3, health=4, travel_blog=5, webhook-handle
 | Service | Key Operational Knowledge |
 |---------|--------------------------|
 | Nextcloud | MaxRequestWorkers=150, needs 8Gi limit (Apache transient memory spikes, see commit eb94144), very generous startup probe |
-| Immich | ML on SSD, disable ModSecurity (breaks streaming), CUDA for ML, frequent upgrades |
+| Immich | ML on SSD (CUDA), disable ModSecurity (breaks streaming), frequent upgrades. **Video transcoding is GPU-accelerated**: `immich-server` is pinned to GPU node1 (nodeSelector `nvidia.com/gpu.present` + NoSchedule toleration + `gpu-workload` priority) with a time-sliced `nvidia.com/gpu=1` slice — the stock immich-server image's ffmpeg already ships h264/hevc_nvenc + NVDEC. Activated via `ffmpeg.accel=nvenc` + `accelDecode=true` in the **DB** system-config (`system_metadata` table, key `system-config`, JSONB — NOT Terraform; app config is DB-managed here like oauth/smtp). Direct DB edits need a pod **recreate** to reload (config is cached at boot; only API-driven changes broadcast a reload). If Immich is ever reinstalled fresh (not restored), re-set these two keys. Thumbnails/previews live on SSD NFS (sdb) — do NOT move to block storage (HDD sdc = slower + the contended IO domain). |
 | CrowdSec | Pin version, disable Metabase when not needed (CPU hog), LAPI scaled to 3, **DB on PostgreSQL** (migrated from MySQL), flush config: max_items=10000/max_age=7d/agents_autodelete=30d, DECISION_DURATION=168h in blocklist CronJob |
 | Frigate | GPU stall detection in liveness probe (inference speed check), high CPU |
 | Authentik | 3 replicas, PgBouncer in front of PostgreSQL, strip auth headers before forwarding |
diff --git a/docs/architecture/compute.md b/docs/architecture/compute.md
index 567ba5eb..fe27f730 100644
--- a/docs/architecture/compute.md
+++ b/docs/architecture/compute.md
@@ -330,10 +330,14 @@ label with it, and `null_resource.gpu_node_config` re-applies the
 next apply (discovery keyed on
 `feature.node.kubernetes.io/pci-10de.present=true`).
 
-**GPU Workloads**:
-- Ollama (LLM inference)
-- ComfyUI (Stable Diffusion workflows)
-- Stable Diffusion WebUI
+**GPU Workloads** (time-sliced — node advertises `Tesla-T4-SHARED`,
+`sharing-strategy=time-slicing`, `nvidia.com/gpu.replicas=100`, so many pods
+share the single T4; request `nvidia.com/gpu: 1` for a slice, not the whole card):
+- immich-machine-learning (CLIP smart-search + facial recognition, CUDA)
+- immich-server (NVENC/NVDEC video transcoding — `ffmpeg.accel=nvenc` + `accelDecode=true`)
+- Frigate (object-detection inference)
+- llama-cpp / llama-swap (LLM inference)
+- nvidia-exporter + gpu-pod-exporter (DCGM metrics)
 
 ## Configuration
 
diff --git a/stacks/immich/frame.tf b/stacks/immich/frame.tf
index e1a4471c..ab375aa4 100644
--- a/stacks/immich/frame.tf
+++ b/stacks/immich/frame.tf
@@ -96,8 +96,17 @@ resource "kubernetes_deployment" "immich-frame" {
     }
   }
   lifecycle {
-    # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
-    ignore_changes = [spec[0].template[0].spec[0].dns_config]
+    ignore_changes = [
+      spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
+      metadata[0].annotations["keel.sh/policy"],
+      metadata[0].annotations["keel.sh/trigger"],
+      metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
+      metadata[0].annotations["keel.sh/match-tag"],
+      metadata[0].annotations["kubernetes.io/change-cause"],
+      metadata[0].annotations["deployment.kubernetes.io/revision"],
+      spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
+      spec[0].template[0].spec[0].container[0].image,                     # KEEL_IGNORE_IMAGE
+    ]
   }
 }
 
diff --git a/stacks/immich/main.tf b/stacks/immich/main.tf
index 87fae5fa..183b3e50 100644
--- a/stacks/immich/main.tf
+++ b/stacks/immich/main.tf
@@ -145,7 +145,7 @@ resource "kubernetes_namespace" "immich" {
       # so this stack can own the tier-quota with a higher memory cap.
       "resource-governance/custom-quota" = "true"
       tier                               = local.tiers.gpu
-      "keel.sh/enrolled" = "true"
+      "keel.sh/enrolled"                 = "true"
     }
   }
   lifecycle {
@@ -225,7 +225,7 @@ resource "kubernetes_deployment" "immich_server" {
       metadata[0].annotations["kubernetes.io/change-cause"],
       metadata[0].annotations["deployment.kubernetes.io/revision"],
       spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
-      spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE
+      spec[0].template[0].spec[0].container[0].image,                     # KEEL_IGNORE_IMAGE
     ]
   }
 
@@ -256,6 +256,19 @@ resource "kubernetes_deployment" "immich_server" {
       }
 
       spec {
+        # Pinned to the GPU node for NVENC hardware video transcoding (Tesla T4,
+        # time-sliced). The immich-server image's ffmpeg ships h264/hevc_nvenc;
+        # activation is via system-config ffmpeg.accel=nvenc.
+        priority_class_name = "gpu-workload"
+        node_selector = {
+          "nvidia.com/gpu.present" : "true"
+        }
+        toleration {
+          key      = "nvidia.com/gpu"
+          operator = "Equal"
+          value    = "true"
+          effect   = "NoSchedule"
+        }
         container {
           name  = "immich-server"
           image = "ghcr.io/immich-app/immich-server:${var.immich_version}"
@@ -324,8 +337,8 @@ resource "kubernetes_deployment" "immich_server" {
               path = "/api/server/ping"
               port = "http"
             }
-            period_seconds    = 10
-            timeout_seconds   = 1
+            period_seconds  = 10
+            timeout_seconds = 1
             # Bumped 30 → 360 (5min → 1h): after a PG restart, immich-server
             # reindexes the clip_index + face_index vector tables before binding
             # the API port. Hundreds of thousands of rows take longer than 5min
@@ -371,7 +384,8 @@ resource "kubernetes_deployment" "immich_server" {
               memory = "8Gi"
             }
             limits = {
-              memory = "8Gi"
+              memory           = "8Gi"
+              "nvidia.com/gpu" = "1"
             }
           }
         }
@@ -462,7 +476,7 @@ resource "kubernetes_deployment" "immich-postgres" {
       metadata[0].annotations["keel.sh/policy"],
       metadata[0].annotations["keel.sh/trigger"],
       metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
-      spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE
+      spec[0].template[0].spec[0].container[0].image,  # KEEL_IGNORE_IMAGE
     ]
   }
 
@@ -636,7 +650,7 @@ resource "kubernetes_deployment" "immich-machine-learning" {
       metadata[0].annotations["kubernetes.io/change-cause"],
       metadata[0].annotations["deployment.kubernetes.io/revision"],
       spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
-      spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE
+      spec[0].template[0].spec[0].container[0].image,                     # KEEL_IGNORE_IMAGE
     ]
   }