[ci skip] Add HomeAssistantDown alert for ha-sofia
Fires after 5m if the haos Prometheus scrape target is unreachable. Covers the HTTP API endpoint which shares the same process as the WebSocket API used by the mobile app.
This commit is contained in:
parent
46ffc37dcf
commit
cd5261161b
1 changed files with 7 additions and 0 deletions
|
|
@ -316,6 +316,13 @@ serverFiles:
|
|||
summary: "Job {{ $labels.namespace }}/{{ $labels.job_name }}: {{ $value | printf \"%.0f\" }} failure(s)"
|
||||
- name: Infrastructure Health
|
||||
rules:
|
||||
- alert: HomeAssistantDown
|
||||
expr: up{job="haos"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Home Assistant down: {{ $labels.instance }}"
|
||||
- alert: CoreDNSErrors
|
||||
expr: rate(coredns_dns_responses_total{rcode="SERVFAIL"}[5m]) > 1
|
||||
for: 10m
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue