From 0b1282a13cb963dcb0a0443745ae4424b578e3ad Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 24 May 2026 09:01:17 +0000 Subject: [PATCH] llama-cpp: ignore_changes for keel/k8s-managed annotations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every `tg apply` was reverting the annotations that keel patches when it detects an upstream digest change — `keel.sh/match-tag` (Kyverno-stamped), `keel.sh/update-time` (on the pod template; what actually triggers the rollout), plus the K8s-managed `kubernetes.io/change-cause` and `deployment.kubernetes.io/revision`. The revert forced a rollout, then the next keel poll re-stamped the annotations, forcing another. With llama-swap's ~10s cold-load on each pod recreate the user noticed. Upstream `ghcr.io/mostlygeek/llama-swap:cuda` is a moving nightly tag — keel still drives one legitimate rollout per day at ~07:25 UTC; this patch stops the apply-driven extra rollouts on top of that. Co-Authored-By: Claude Opus 4.7 --- stacks/llama-cpp/main.tf | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/stacks/llama-cpp/main.tf b/stacks/llama-cpp/main.tf index 13b3cb3b..3d3c5ac4 100644 --- a/stacks/llama-cpp/main.tf +++ b/stacks/llama-cpp/main.tf @@ -373,10 +373,22 @@ resource "kubernetes_deployment" "llama_swap" { lifecycle { ignore_changes = [ spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1 + metadata[0].annotations["keel.sh/match-tag"], metadata[0].annotations["keel.sh/policy"], metadata[0].annotations["keel.sh/trigger"], metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2 spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE + # KEEL_LIFECYCLE_V1 — stop the apply→keel fight: every keel digest + # update patches `keel.sh/update-time` on the pod template and + # `kubernetes.io/change-cause` + bumps the K8s rollout revision on + # the Deployment. Without these ignore_changes, every `tg apply` + # reverts those, forcing a rollout, which keel then re-patches on + # the next 1h poll → llama-swap was rolling several times a day + # (~10s model-load downtime each). Upstream :cuda nightly cadence + # still triggers a legitimate daily rollout. + metadata[0].annotations["kubernetes.io/change-cause"], + metadata[0].annotations["deployment.kubernetes.io/revision"], + spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], ] }