[monitoring] Fix alerts for intentionally scaled-down services

PoisonFountainDown and ForwardAuthFallbackActive both fired because
poison-fountain was scaled to 0 replicas (intentional). Updated both
alert expressions to check kube_deployment_spec_replicas > 0 before
alerting on missing available replicas — if desired replicas is 0,
the service is intentionally down and should not alert.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-04-17 19:17:41 +00:00
parent a19581e32b
commit 7a884a0b97

View file

@ -1369,7 +1369,9 @@ serverFiles:
annotations:
summary: "Authentik auth server has no available replicas"
- alert: PoisonFountainDown
expr: (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1
expr: |
kube_deployment_spec_replicas{namespace="poison-fountain", deployment="poison-fountain"} > 0
and (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1
for: 5m
labels:
severity: warning
@ -1606,8 +1608,12 @@ serverFiles:
summary: "{{ $labels.service }} has {{ $value | printf \"%.0f\" }} open connections (threshold: 500)"
- alert: ForwardAuthFallbackActive
expr: |
(kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1
or (kube_deployment_status_replicas_available{namespace="authentik", deployment="goauthentik-server"} or on() vector(0)) < 1
(
kube_deployment_spec_replicas{namespace="poison-fountain", deployment="poison-fountain"} > 0
and (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1
) or (
kube_deployment_status_replicas_available{namespace="authentik", deployment="goauthentik-server"} or on() vector(0)
) < 1
for: 5m
labels:
severity: warning