From 0b1282a13cb963dcb0a0443745ae4424b578e3ad Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Sun, 24 May 2026 09:01:17 +0000
Subject: [PATCH] llama-cpp: ignore_changes for keel/k8s-managed annotations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every `tg apply` was reverting the annotations that keel patches when it
detects an upstream digest change — `keel.sh/match-tag` (Kyverno-stamped),
`keel.sh/update-time` (on the pod template; what actually triggers the
rollout), plus the K8s-managed `kubernetes.io/change-cause` and
`deployment.kubernetes.io/revision`. The revert forced a rollout, then
the next keel poll re-stamped the annotations, forcing another. With
llama-swap's ~10s cold-load on each pod recreate the user noticed.

Upstream `ghcr.io/mostlygeek/llama-swap:cuda` is a moving nightly tag —
keel still drives one legitimate rollout per day at ~07:25 UTC; this
patch stops the apply-driven extra rollouts on top of that.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 stacks/llama-cpp/main.tf | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/stacks/llama-cpp/main.tf b/stacks/llama-cpp/main.tf
index 13b3cb3b..3d3c5ac4 100644
--- a/stacks/llama-cpp/main.tf
+++ b/stacks/llama-cpp/main.tf
@@ -373,10 +373,22 @@ resource "kubernetes_deployment" "llama_swap" {
   lifecycle {
     ignore_changes = [
       spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
+      metadata[0].annotations["keel.sh/match-tag"],
       metadata[0].annotations["keel.sh/policy"],
       metadata[0].annotations["keel.sh/trigger"],
       metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
       spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE
+      # KEEL_LIFECYCLE_V1 — stop the apply→keel fight: every keel digest
+      # update patches `keel.sh/update-time` on the pod template and
+      # `kubernetes.io/change-cause` + bumps the K8s rollout revision on
+      # the Deployment. Without these ignore_changes, every `tg apply`
+      # reverts those, forcing a rollout, which keel then re-patches on
+      # the next 1h poll → llama-swap was rolling several times a day
+      # (~10s model-load downtime each). Upstream :cuda nightly cadence
+      # still triggers a legitimate daily rollout.
+      metadata[0].annotations["kubernetes.io/change-cause"],
+      metadata[0].annotations["deployment.kubernetes.io/revision"],
+      spec[0].template[0].metadata[0].annotations["keel.sh/update-time"],
     ]
   }