diff --git a/stacks/chrome-service/main.tf b/stacks/chrome-service/main.tf index 30210808..d0db5c97 100644 --- a/stacks/chrome-service/main.tf +++ b/stacks/chrome-service/main.tf @@ -445,6 +445,10 @@ resource "kubernetes_deployment" "chrome_service" { # clobber to the novnc image stick (chromium-not-found crashloop 2026-06-16) # because TF could not revert the ignored field. Removed so TF re-asserts the # pinned image. Keel is inert (keel.sh/policy=never) and no deploy step touches these. + # NOTE: the LIVE pod's container order had drifted to [novnc, chrome-service, + # snapshot] vs this file's [chrome-service, novnc, snapshot]; a TF apply reorders + # them to match here (harmless), so `containers[0]` differs between live and TF + # until the next apply lands — don't be alarmed reading it back mid-reconcile. spec[0].template[0].spec[0].init_container[0].image, metadata[0].annotations["kubernetes.io/change-cause"], metadata[0].annotations["deployment.kubernetes.io/revision"], diff --git a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl index a86c832f..4ca6667c 100755 --- a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl @@ -2840,6 +2840,7 @@ serverFiles: annotations: summary: "MAM ratio is {{ $value | printf \"%.2f\" }} for 24h (target: >= 1.0)" - alert: MAMFarmingStuck + # Metric source: stacks/servarr/mam-farming/files/freeleech-grabber.py # Heartbeat-based: fires only when the grabber CronJob has not COMPLETED # a run in >4h (the original failure mode: Forbid-blocked / wedged in # ContainerCreating). The grabber heartbeats mam_grabber_last_run_timestamp