diff --git a/modules/kubernetes/descheduler/main.tf b/modules/kubernetes/descheduler/main.tf index 435aa074..3e8cd967 100644 --- a/modules/kubernetes/descheduler/main.tf +++ b/modules/kubernetes/descheduler/main.tf @@ -91,6 +91,14 @@ resource "kubernetes_config_map" "policy" { "cpu" : 70 "memory": 30 "pods": 50 + "HighNodeUtilization" + enabled: true + params: + nodeResourceUtilizationThresholds: + thresholds: + "cpu" : 20 + "memory": 20 + "pods": 20 "PodLifeTime": enabled: true params: diff --git a/modules/kubernetes/monitoring/prometheus_chart_values.tpl b/modules/kubernetes/monitoring/prometheus_chart_values.tpl index 4c0a8c1c..1375e54c 100644 --- a/modules/kubernetes/monitoring/prometheus_chart_values.tpl +++ b/modules/kubernetes/monitoring/prometheus_chart_values.tpl @@ -183,7 +183,6 @@ serverFiles: severity: page annotations: summary: OpenWRT high memory usage. Can cause services getting stuck. - - name: Mailserver Down rules: - alert: Mail server has no replicas available @@ -229,6 +228,15 @@ serverFiles: severity: page annotations: summary: High system load on OpenWRT + - name: Monitor finance app exceptions + rules: + - alert: Monitor finance app exceptions + expr: changes(webhook_failure[5m]) >= 1 + for: 1m + labels: + severity: page + annotations: + summary: Finance app exception extraScrapeConfigs: | - job_name: 'snmp-idrac'