add counter for overall webhook failures

This commit is contained in:
viktorbarzin 2023-04-03 22:37:59 +01:00
parent a3fe565ec5
commit a93aa03f72
2 changed files with 17 additions and 1 deletions

View file

@ -91,6 +91,14 @@ resource "kubernetes_config_map" "policy" {
"cpu" : 70
"memory": 30
"pods": 50
"HighNodeUtilization"
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"cpu" : 20
"memory": 20
"pods": 20
"PodLifeTime":
enabled: true
params:

View file

@ -183,7 +183,6 @@ serverFiles:
severity: page
annotations:
summary: OpenWRT high memory usage. Can cause services getting stuck.
- name: Mailserver Down
rules:
- alert: Mail server has no replicas available
@ -229,6 +228,15 @@ serverFiles:
severity: page
annotations:
summary: High system load on OpenWRT
- name: Monitor finance app exceptions
rules:
- alert: Monitor finance app exceptions
expr: changes(webhook_failure[5m]) >= 1
for: 1m
labels:
severity: page
annotations:
summary: Finance app exception
extraScrapeConfigs: |
- job_name: 'snmp-idrac'