From 7a884a0b977c83b1508bf409022074085ce6859b Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 19:17:41 +0000 Subject: [PATCH] [monitoring] Fix alerts for intentionally scaled-down services MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PoisonFountainDown and ForwardAuthFallbackActive both fired because poison-fountain was scaled to 0 replicas (intentional). Updated both alert expressions to check kube_deployment_spec_replicas > 0 before alerting on missing available replicas — if desired replicas is 0, the service is intentionally down and should not alert. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../modules/monitoring/prometheus_chart_values.tpl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl index f4859c7d..a1724da6 100755 --- a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl @@ -1369,7 +1369,9 @@ serverFiles: annotations: summary: "Authentik auth server has no available replicas" - alert: PoisonFountainDown - expr: (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1 + expr: | + kube_deployment_spec_replicas{namespace="poison-fountain", deployment="poison-fountain"} > 0 + and (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1 for: 5m labels: severity: warning @@ -1606,8 +1608,12 @@ serverFiles: summary: "{{ $labels.service }} has {{ $value | printf \"%.0f\" }} open connections (threshold: 500)" - alert: ForwardAuthFallbackActive expr: | - (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1 - or (kube_deployment_status_replicas_available{namespace="authentik", deployment="goauthentik-server"} or on() vector(0)) < 1 + ( + kube_deployment_spec_replicas{namespace="poison-fountain", deployment="poison-fountain"} > 0 + and (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1 + ) or ( + kube_deployment_status_replicas_available{namespace="authentik", deployment="goauthentik-server"} or on() vector(0) + ) < 1 for: 5m labels: severity: warning