[ci skip] Add HomeAssistantDown alert for ha-sofia

Fires after 5m if the haos Prometheus scrape target is unreachable.
Covers the HTTP API endpoint which shares the same process as the
WebSocket API used by the mobile app.
This commit is contained in:
Viktor Barzin 2026-02-11 23:24:46 +00:00
parent b2d74a93a0
commit 8bea552664
No known key found for this signature in database
GPG key ID: 0EB088298288D958

View file

@ -316,6 +316,13 @@ serverFiles:
summary: "Job {{ $labels.namespace }}/{{ $labels.job_name }}: {{ $value | printf \"%.0f\" }} failure(s)"
- name: Infrastructure Health
rules:
- alert: HomeAssistantDown
expr: up{job="haos"} == 0
for: 5m
labels:
severity: page
annotations:
summary: "Home Assistant down: {{ $labels.instance }}"
- alert: CoreDNSErrors
expr: rate(coredns_dns_responses_total{rcode="SERVFAIL"}[5m]) > 1
for: 10m