[ci skip] Add HomeAssistantDown alert for ha-sofia
Fires after 5m if the haos Prometheus scrape target is unreachable. Covers the HTTP API endpoint which shares the same process as the WebSocket API used by the mobile app.
This commit is contained in:
parent
b2d74a93a0
commit
8bea552664
1 changed files with 7 additions and 0 deletions
|
|
@ -316,6 +316,13 @@ serverFiles:
|
|||
summary: "Job {{ $labels.namespace }}/{{ $labels.job_name }}: {{ $value | printf \"%.0f\" }} failure(s)"
|
||||
- name: Infrastructure Health
|
||||
rules:
|
||||
- alert: HomeAssistantDown
|
||||
expr: up{job="haos"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Home Assistant down: {{ $labels.instance }}"
|
||||
- alert: CoreDNSErrors
|
||||
expr: rate(coredns_dns_responses_total{rcode="SERVFAIL"}[5m]) > 1
|
||||
for: 10m
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue