[ci skip] Add HomeAssistantDown alert for ha-sofia

Fires after 5m if the haos Prometheus scrape target is unreachable.
Covers the HTTP API endpoint which shares the same process as the
WebSocket API used by the mobile app.
This commit is contained in:
Viktor Barzin 2026-02-11 23:24:46 +00:00
parent 46ffc37dcf
commit cd5261161b

View file

@ -316,6 +316,13 @@ serverFiles:
summary: "Job {{ $labels.namespace }}/{{ $labels.job_name }}: {{ $value | printf \"%.0f\" }} failure(s)"
- name: Infrastructure Health
rules:
- alert: HomeAssistantDown
expr: up{job="haos"} == 0
for: 5m
labels:
severity: page
annotations:
summary: "Home Assistant down: {{ $labels.instance }}"
- alert: CoreDNSErrors
expr: rate(coredns_dns_responses_total{rcode="SERVFAIL"}[5m]) > 1
for: 10m