From 5d0e17b5bacef14bc21c621a4592f3762afbe507 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 10 May 2026 19:33:11 +0000 Subject: [PATCH] k8s-version-upgrade: detection script refresh apt before madison + DRY_RUN_OVERRIDE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test 2 dry-run revealed kubeadm plan reports v1.34.7 as latest while apt-cache madison (without prior apt-get update) was reporting v1.34.5 — so the CronJob would have dispatched the agent against a stale target. Now do `sudo apt-get update -qq` for just the kubernetes repo before querying madison. Also add a DRY_RUN_OVERRIDE env precedence so future test invocations can override DRY_RUN without an apply cycle — but Job spec env is immutable post-create, so this is only useful for CronJob spec edits (suspend, then add env, then resume). Documented in the runbook. --- stacks/k8s-version-upgrade/main.tf | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/stacks/k8s-version-upgrade/main.tf b/stacks/k8s-version-upgrade/main.tf index 321136b1..29652ca3 100644 --- a/stacks/k8s-version-upgrade/main.tf +++ b/stacks/k8s-version-upgrade/main.tf @@ -362,8 +362,12 @@ resource "kubernetes_cron_job_v1" "k8s_version_check" { echo "Running version: v$RUNNING (minor $RUNNING_MINOR)" # 3. Detect highest available patch within the running minor track. + # Refresh the local apt cache first — without this, a newly-published + # patch won't show up via `apt-cache madison` until something else + # triggers an `apt-get update`. LATEST_PATCH=$($SSH wizard@k8s-master \ - "apt-cache madison kubeadm 2>/dev/null \ + "sudo apt-get update -qq -o Dir::Etc::sourcelist='sources.list.d/kubernetes.list' -o Dir::Etc::sourceparts='-' -o APT::Get::List-Cleanup='0' >/dev/null 2>&1 ; \ + apt-cache madison kubeadm 2>/dev/null \ | awk '{print \$3}' \ | sed 's/-.*//' \ | grep '^$RUNNING_MINOR\\.' \ @@ -421,8 +425,14 @@ resource "kubernetes_cron_job_v1" "k8s_version_check" { slack "K8s upgrade available: v$RUNNING → v$TARGET ($KIND)" - if [ "$DRY_RUN" = "true" ]; then - echo "DRY_RUN=true — not POSTing to claude-agent-service" + # DRY_RUN_OVERRIDE wins over DRY_RUN — but a Job copied from + # this CronJob can't add new env vars (spec is immutable). The + # operator path for "trigger detection without dispatch" is + # toggling the CronJob's `var.detection_dry_run` then applying. + # Documented in the runbook. + EFFECTIVE_DRY_RUN="$${DRY_RUN_OVERRIDE:-$DRY_RUN}" + if [ "$EFFECTIVE_DRY_RUN" = "true" ]; then + echo "dry_run=true — not POSTing to claude-agent-service" slack "DRY_RUN — skipping agent dispatch" exit 0 fi