From 7120a80696660d9fb3519c320ecd659229a1d4ac Mon Sep 17 00:00:00 2001 From: viktorbarzin Date: Wed, 10 Feb 2021 23:14:09 +0000 Subject: [PATCH] fix typo from templating which caused missing metrics and add alerts to prevent that from happening again --- .../monitoring/prometheus_chart_values.tpl | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/modules/kubernetes/monitoring/prometheus_chart_values.tpl b/modules/kubernetes/monitoring/prometheus_chart_values.tpl index 7c32778c..eeed1d6a 100644 --- a/modules/kubernetes/monitoring/prometheus_chart_values.tpl +++ b/modules/kubernetes/monitoring/prometheus_chart_values.tpl @@ -134,6 +134,33 @@ serverFiles: severity: page annotations: summary: Power voltage on a power supply is critically low indicating power outage. + - name: HighPowerUsage + rules: + - alert: HighPowerUsage + expr: (max(r730_idrac_amperageProbeReading) or on() vector(0)) > 112 + for: 30m + labels: + severity: page + annotations: + summary: High Power usage. Baseline is 112W + - name: NoNodeLoadData + rules: + - alert: NoNodeLoadData + expr: (node_load1 OR on() vector(0)) == 0 + for: 10m + labels: + severity: page + annotations: + summary: No node load data. Can signal that prometheus is not scraping + - name: NoiDRACData + rules: + - alert: NoiDRACData + expr: (max(r730_idrac_amperageProbeReading) or on() vector(0)) == 0 + for: 10m + labels: + severity: page + annotations: + summary: No iDRAC amperage reading. Can signal that prometheus is not scraping extraScrapeConfigs: | - job_name: 'snmp-idrac' @@ -155,7 +182,7 @@ extraScrapeConfigs: | target_label: '__name__' action: replace regex: '(.*)' - replacement: 'r730_idrac_${1}' + replacement: 'r730_idrac_$${1}' - job_name: 'openwrt' static_configs: - targets: @@ -173,4 +200,4 @@ extraScrapeConfigs: | target_label: '__name__' action: replace regex: '(.*)' - replacement: 'openwrt_${1}' + replacement: 'openwrt_$${1}'