From 6ad5292128ff58a549ab227550ca2fbf93869205 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 26 Apr 2026 20:02:28 +0000 Subject: [PATCH] immich: bump server to 8Gi + override tier-2-gpu quota to 20Gi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminates the OOM-on-face-detection-burst class of incidents (2026-04-26). VPA upper for immich-server is 2.98Gi steady-state; the prior 4Gi limit was 1.34x upper and still got SIGKILL'd when face-detection bursts pushed transient RSS past 4Gi. 8Gi gives 2.7x VPA upper headroom. The kyverno tier-2-gpu default quota is 12Gi requests.memory which can't fit 8Gi (server) + 3.5Gi (ML) + 3Gi (PG) + backup CronJobs simultaneously. Opts the namespace into the kyverno custom-quota exclude rule and overrides with 20Gi (~4.5Gi headroom) — same pattern as woodpecker/nvidia. --- stacks/immich/main.tf | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/stacks/immich/main.tf b/stacks/immich/main.tf index 3ee56d1f..0555b653 100644 --- a/stacks/immich/main.tf +++ b/stacks/immich/main.tf @@ -121,7 +121,10 @@ resource "kubernetes_namespace" "immich" { metadata { name = "immich" labels = { - tier = local.tiers.gpu + # Opts immich out of kyverno's `quota-tier-2-gpu` generation rule + # so this stack can own the tier-quota with a higher memory cap. + "resource-governance/custom-quota" = "true" + tier = local.tiers.gpu } } lifecycle { @@ -130,6 +133,25 @@ resource "kubernetes_namespace" "immich" { } } +# Override the kyverno-generated tier-2-gpu quota (12Gi requests.memory). +# Immich-server needs 8Gi to absorb face-detection burst spikes (OOM 2026-04-26) +# without OOM. Plus immich-machine-learning (3.5Gi) + immich-postgresql (3Gi) + +# backup CronJobs ≈ 15.5Gi. 20Gi gives ~4.5Gi headroom. +resource "kubernetes_resource_quota" "immich" { + metadata { + name = "tier-quota" + namespace = kubernetes_namespace.immich.metadata[0].name + } + spec { + hard = { + "requests.cpu" = "8" + "requests.memory" = "20Gi" + "limits.memory" = "32Gi" + pods = "40" + } + } +} + resource "kubernetes_manifest" "external_secret" { manifest = { apiVersion = "external-secrets.io/v1beta1" @@ -311,10 +333,10 @@ resource "kubernetes_deployment" "immich_server" { resources { requests = { cpu = "100m" - memory = "4096Mi" + memory = "8Gi" } limits = { - memory = "4096Mi" + memory = "8Gi" } } }