immich: bump server to 8Gi + override tier-2-gpu quota to 20Gi

Eliminates the OOM-on-face-detection-burst class of incidents (2026-04-26). VPA upper for immich-server is 2.98Gi steady-state; the prior 4Gi limit was 1.34x upper and still got SIGKILL'd when face-detection bursts pushed transient RSS past 4Gi. 8Gi gives 2.7x VPA upper headroom. The kyverno tier-2-gpu default quota is 12Gi requests.memory which can't fit 8Gi (server) + 3.5Gi (ML) + 3Gi (PG) + backup CronJobs simultaneously. Opts the namespace into the kyverno custom-quota exclude rule and overrides with 20Gi (~4.5Gi headroom) — same pattern as woodpecker/nvidia.
2026-04-26 20:02:28 +00:00 · 2026-04-26 20:02:28 +00:00 · 6ad5292128
commit 6ad5292128
parent d093aed7f6
1 changed files with 25 additions and 3 deletions
--- a/stacks/immich/main.tf
+++ b/stacks/immich/main.tf
@ -121,7 +121,10 @@ resource "kubernetes_namespace" "immich" {
  metadata {
    name = "immich"
    labels = {
-      tier = local.tiers.gpu
+      # Opts immich out of kyverno's `quota-tier-2-gpu` generation rule
+      # so this stack can own the tier-quota with a higher memory cap.
+      "resource-governance/custom-quota" = "true"
+      tier                               = local.tiers.gpu
    }
  }
  lifecycle {
@ -130,6 +133,25 @@ resource "kubernetes_namespace" "immich" {
  }
 }

+# Override the kyverno-generated tier-2-gpu quota (12Gi requests.memory).
+# Immich-server needs 8Gi to absorb face-detection burst spikes (OOM 2026-04-26)
+# without OOM. Plus immich-machine-learning (3.5Gi) + immich-postgresql (3Gi) +
+# backup CronJobs ≈ 15.5Gi. 20Gi gives ~4.5Gi headroom.
+resource "kubernetes_resource_quota" "immich" {
+  metadata {
+    name      = "tier-quota"
+    namespace = kubernetes_namespace.immich.metadata[0].name
+  }
+  spec {
+    hard = {
+      "requests.cpu"    = "8"
+      "requests.memory" = "20Gi"
+      "limits.memory"   = "32Gi"
+      pods              = "40"
+    }
+  }
+}
+
 resource "kubernetes_manifest" "external_secret" {
  manifest = {
    apiVersion = "external-secrets.io/v1beta1"
@ -311,10 +333,10 @@ resource "kubernetes_deployment" "immich_server" {
          resources {
            requests = {
              cpu    = "100m"
-              memory = "4096Mi"
+              memory = "8Gi"
            }
            limits = {
-              memory = "4096Mi"
+              memory = "8Gi"
            }
          }
        }