[monitoring] Fix alerts for intentionally scaled-down services
PoisonFountainDown and ForwardAuthFallbackActive both fired because poison-fountain was scaled to 0 replicas (intentional). Updated both alert expressions to check kube_deployment_spec_replicas > 0 before alerting on missing available replicas — if desired replicas is 0, the service is intentionally down and should not alert. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a19581e32b
commit
7a884a0b97
1 changed files with 9 additions and 3 deletions
|
|
@ -1369,7 +1369,9 @@ serverFiles:
|
|||
annotations:
|
||||
summary: "Authentik auth server has no available replicas"
|
||||
- alert: PoisonFountainDown
|
||||
expr: (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1
|
||||
expr: |
|
||||
kube_deployment_spec_replicas{namespace="poison-fountain", deployment="poison-fountain"} > 0
|
||||
and (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
|
|
@ -1606,8 +1608,12 @@ serverFiles:
|
|||
summary: "{{ $labels.service }} has {{ $value | printf \"%.0f\" }} open connections (threshold: 500)"
|
||||
- alert: ForwardAuthFallbackActive
|
||||
expr: |
|
||||
(kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1
|
||||
or (kube_deployment_status_replicas_available{namespace="authentik", deployment="goauthentik-server"} or on() vector(0)) < 1
|
||||
(
|
||||
kube_deployment_spec_replicas{namespace="poison-fountain", deployment="poison-fountain"} > 0
|
||||
and (kube_deployment_status_replicas_available{namespace="poison-fountain", deployment="poison-fountain"} or on() vector(0)) < 1
|
||||
) or (
|
||||
kube_deployment_status_replicas_available{namespace="authentik", deployment="goauthentik-server"} or on() vector(0)
|
||||
) < 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue