From bc486227f7d91c7602e0e74404c09a5f55a02d27 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 14 Dec 2025 09:48:59 +0000 Subject: [PATCH] add separate idrac monitoring tool and dashboard [ci skip] --- .../monitoring/dashboards/idrac.json | 4243 ++++++++++------- .../monitoring/dashboards/nvidia.json | 710 +++ modules/kubernetes/monitoring/main.tf | 19 +- .../monitoring/prometheus_chart_values.tpl | 35 +- 4 files changed, 3310 insertions(+), 1697 deletions(-) create mode 100644 modules/kubernetes/monitoring/dashboards/nvidia.json diff --git a/modules/kubernetes/monitoring/dashboards/idrac.json b/modules/kubernetes/monitoring/dashboards/idrac.json index 869fe504..b0e3e9e5 100644 --- a/modules/kubernetes/monitoring/dashboards/idrac.json +++ b/modules/kubernetes/monitoring/dashboards/idrac.json @@ -2,170 +2,587 @@ "annotations": { "list": [ { - "$$hashKey": "object:208", "builtIn": 1, "datasource": { - "type": "datasource", - "uid": "grafana" + "type": "grafana", + "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "enable": true, + "expr": "last_over_time(r730_idrac_idrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"}[$__interval] offset -$__interval) * 1000 < ${__to}\nand \nlast_over_time(r730_idrac_idrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"}[$__interval] offset -$__interval) * 1000 > ${__from}", + "iconColor": "purple", + "name": "Events", + "tagKeys": "instance,severity", + "textFormat": "{{ message }}", + "titleFormat": "{{ id }}", + "useValueForTime": "on" } ] }, - "description": "SNMP Based Dashboard to Monitor Dell Hosts via iDRAC - by ilovepancakes95", "editable": true, "fiscalYearStartMonth": 0, - "gnetId": 12106, "graphTooltip": 0, - "id": 16, - "links": [], + "id": 28, + "links": [ + { + "asDropdown": false, + "icon": "dashboard", + "includeVars": false, + "keepTime": true, + "tags": [], + "targetBlank": false, + "title": "BMC overview", + "tooltip": "", + "type": "link", + "url": "d/3faidY24k" + } + ], + "liveNow": false, "panels": [ { "collapsed": false, - "datasource": { - "uid": "${DS_PROMETHEUS-01}" - }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 182, + "id": 2, "panels": [], - "targets": [ - { - "datasource": { - "uid": "${DS_PROMETHEUS-01}" - }, - "refId": "A" - } - ], - "title": "iDRAC Summary", + "title": "Information", "type": "row" }, { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 0, - "y": 1 - }, - "id": 8, - "maxDataPoints": 100, - "options": { - "code": { - "language": "plaintext", - "showLineNumbers": false, - "showMiniMap": false - }, - "content": "GCFSDN2", - "mode": "markdown" - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "expr": "", - "format": "table", - "groupBy": [], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"system-servicetag\" FROM /^$idrac_host$/ WHERE (\"system-name\" =~ /^$idrac_host$/) AND $timeFilter", - "rawQuery": false, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "system-servicetag" - ], - "type": "field" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - } - ], - "title": "Service Tag", - "type": "text" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "fieldConfig": { "defaults": { "mappings": [ { "options": { - "1": { - "text": "Other" + "Critical": { + "color": "light-red", + "index": 2 + }, + "OK": { + "color": "light-green", + "index": 3 + }, + "Warning": { + "color": "light-yellow", + "index": 1 } }, "type": "value" }, { "options": { - "2": { - "text": "Unknown" + "match": "null", + "result": { + "index": 0, + "text": "N/A" } }, - "type": "value" - }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 1 + }, + "hideTimeOverride": true, + "id": 8, + "interval": "1m", + "maxDataPoints": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^status$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "r730_idrac_idrac_system_health{instance=~\"$instance\", job=~\"$job\"}", + "format": "table", + "instant": true, + "legendFormat": "{{status}}", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" + } + ], + "timeFrom": "1m", + "title": "Health", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ { "options": { - "3": { - "text": "OK" + "match": "null", + "result": { + "index": 0, + "text": "N/A" } }, - "type": "value" + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "hideTimeOverride": true, + "id": 33, + "interval": "1m", + "maxDataPoints": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "name", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "r730_idrac_idrac_system_bios_info{instance=\"$instance\", job=~\"$job\"}", + "format": "time_series", + "instant": true, + "legendFormat": "{{version}}", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" + } + ], + "timeFrom": "1m", + "title": "BIOS version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Capacity" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "custom.width", + "value": 92 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "status" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "Critical": { + "color": "light-red", + "index": 2 + }, + "OK": { + "color": "light-green", + "index": 0 + }, + "Warning": { + "color": "light-yellow", + "index": 1 + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #SPEED" + }, + "properties": [ + { + "id": "unit", + "value": "mHz" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "type" + }, + "properties": [ + { + "id": "custom.width", + "value": 81 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "rank" + }, + "properties": [ + { + "id": "custom.width", + "value": 86 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "serial" + }, + "properties": [ + { + "id": "custom.width", + "value": 105 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "name" + }, + "properties": [ + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Speed" + }, + "properties": [ + { + "id": "custom.width", + "value": 129 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 32, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "name" + } + ] + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "r730_idrac_idrac_memory_module_info{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "instant": false, + "range": true, + "refId": "INFO" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "r730_idrac_idrac_memory_module_health{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "hide": false, + "instant": false, + "range": true, + "refId": "HEALTH" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "r730_idrac_idrac_memory_module_capacity_bytes{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "hide": false, + "instant": false, + "range": true, + "refId": "CAPACITY" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "r730_idrac_idrac_memory_module_speed_mhz{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "hide": false, + "instant": false, + "range": true, + "refId": "SPEED" + } + ], + "title": "Memory", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "id", + "mode": "outer" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "(ecc|name|type|manufacturer|rank|mediatype|model|protocol|serial|slot|status|Value #CAPACITY|Value #SPEED)" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value #CAPACITY": 6, + "Value #SPEED": 7, + "ecc": 3, + "manufacturer": 1, + "name": 0, + "rank": 4, + "serial": 2, + "status": 8, + "type": 5 + }, + "renameByName": { + "Value #CAPACITY": "Capacity", + "Value #SPEED": "Speed" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ { "options": { - "4": { - "text": "Non-Critical Fail" + "match": "null", + "result": { + "text": "N/A" } }, - "type": "value" - }, - { - "options": { - "5": { - "text": "Critical Fail" - } - }, - "type": "value" - }, - { - "options": { - "6": { - "text": "Non-Recoverable Fail" - } - }, - "type": "value" + "type": "special" } ], "thresholds": { @@ -183,14 +600,14 @@ }, "gridPos": { "h": 2, - "w": 2, - "x": 2, - "y": 1 + "w": 4, + "x": 0, + "y": 5 }, - "id": 406, - "maxDataPoints": 100, + "hideTimeOverride": true, + "id": 4, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", @@ -199,110 +616,69 @@ "calcs": [ "lastNotNull" ], - "fields": "", + "fields": "/^manufacturer$/", "values": false }, "showPercentChange": false, - "textMode": "auto", + "text": {}, + "textMode": "value", "wideLayout": true }, "pluginVersion": "11.1.0", "targets": [ { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", - "expr": "r730_idrac_redfish_system_processor_total_threads", - "groupBy": [], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"power-state\" FROM \"esxi-01-idrac\" WHERE $timeFilter", - "range": true, - "rawQuery": false, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "memory-status" - ], - "type": "field" - } - ] - ], - "tags": [ + "exemplar": false, + "expr": "r730_idrac_idrac_system_machine_info{instance=\"$instance\", job=~\"$job\"}", + "format": "table", + "instant": true, + "legendFormat": "{{manufacturer}}", + "metrics": [ { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" + "id": "1", + "type": "count" } - ] + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" } ], - "title": "Total Threads", + "title": "Vendor", "type": "stat" }, { "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "type": "datasource", + "uid": "-- Dashboard --" }, "fieldConfig": { "defaults": { "mappings": [ { "options": { - "1": { - "text": "Other" + "match": "null", + "result": { + "text": "N/A" } }, - "type": "value" - }, - { - "options": { - "2": { - "text": "Unknown" - } - }, - "type": "value" - }, - { - "options": { - "3": { - "text": "OK" - } - }, - "type": "value" - }, - { - "options": { - "4": { - "text": "Non-Critical Fail" - } - }, - "type": "value" - }, - { - "options": { - "5": { - "text": "Critical Fail" - } - }, - "type": "value" - }, - { - "options": { - "6": { - "text": "Non-Recoverable Fail" - } - }, - "type": "value" + "type": "special" } ], "thresholds": { @@ -314,20 +690,20 @@ } ] }, - "unit": "decmbytes" + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 2, - "w": 2, + "w": 4, "x": 4, - "y": 1 + "y": 5 }, - "id": 407, - "maxDataPoints": 100, + "hideTimeOverride": true, + "id": 6, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", @@ -336,112 +712,36 @@ "calcs": [ "lastNotNull" ], - "fields": "", + "fields": "/^serial$/", "values": false }, "showPercentChange": false, - "textMode": "auto", + "text": {}, + "textMode": "value", "wideLayout": true }, "pluginVersion": "11.1.0", "targets": [ { "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "type": "datasource", + "uid": "-- Dashboard --" }, - "editorMode": "code", - "expr": "sum(r730_idrac_redfish_system_memory_capacity)", - "groupBy": [], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"power-state\" FROM \"esxi-01-idrac\" WHERE $timeFilter", - "range": true, - "rawQuery": false, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "temp-status" - ], - "type": "field" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] + "panelId": 4, + "refId": "A" } ], - "title": "Total Memory", + "title": "Serial Number", "type": "stat" }, { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "1": { - "text": "Other" - } - }, - "type": "value" - }, - { - "options": { - "2": { - "text": "Unknown" - } - }, - "type": "value" - }, - { - "options": { - "3": { - "text": "OK" - } - }, - "type": "value" - }, - { - "options": { - "4": { - "text": "Non-Critical Fail" - } - }, - "type": "value" - }, - { - "options": { - "5": { - "text": "Critical Fail" - } - }, - "type": "value" - }, - { - "options": { - "6": { - "text": "Non-Recoverable Fail" - } - }, - "type": "value" - } - ], + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ @@ -457,14 +757,442 @@ }, "gridPos": { "h": 2, - "w": 2, - "x": 6, - "y": 1 + "w": 4, + "x": 8, + "y": 5 }, - "id": 405, - "maxDataPoints": 100, + "hideTimeOverride": true, + "id": 20, + "interval": "1m", + "maxDataPoints": 1, "options": { - "colorMode": "background", + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "r730_idrac_idrac_system_memory_size_bytes{instance=\"$instance\", job=~\"$job\"}", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" + } + ], + "timeFrom": "1m", + "title": "Memory Size", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 0, + "y": 7 + }, + "hideTimeOverride": true, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^model$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 4, + "refId": "A" + } + ], + "title": "Model", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 4, + "y": 7 + }, + "hideTimeOverride": true, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^sku$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Dashboard --" + }, + "panelId": 4, + "refId": "A" + } + ], + "title": "SKU", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 8, + "y": 7 + }, + "hideTimeOverride": true, + "id": 21, + "interval": "1m", + "maxDataPoints": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "r730_idrac_idrac_system_cpu_count{instance=\"$instance\", job=~\"$job\"}", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" + } + ], + "timeFrom": "1m", + "title": "CPUs", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 0, + "y": 9 + }, + "hideTimeOverride": true, + "id": 30, + "interval": "1m", + "maxDataPoints": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(r730_idrac_idrac_storage_drive_info{instance=\"$instance\", job=~\"$job\"})", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" + } + ], + "timeFrom": "1m", + "title": "Disks", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 4, + "y": 9 + }, + "hideTimeOverride": true, + "id": 25, + "interval": "1m", + "maxDataPoints": 1, + "options": { + "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", @@ -477,131 +1205,552 @@ "values": false }, "showPercentChange": false, - "textMode": "auto", + "text": {}, + "textMode": "name", "wideLayout": true }, "pluginVersion": "11.1.0", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "r730_idrac_idrac_system_indicator_led_on{instance=\"$instance\", job=~\"$job\"}", + "format": "time_series", + "instant": true, + "legendFormat": "{{state}}", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" + } + ], + "timeFrom": "1m", + "title": "Indicator LED", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Off" + }, + "1": { + "color": "green", + "index": 0, + "text": "On" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 8, + "y": 9 + }, + "hideTimeOverride": true, + "id": 9, + "interval": "1m", + "maxDataPoints": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "r730_idrac_idrac_system_power_on{instance=\"$instance\", job=~\"$job\"}", + "instant": true, + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" + } + ], + "timeFrom": "1m", + "title": "Power Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Capacity" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "status" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "Critical": { + "color": "light-red", + "index": 2 + }, + "OK": { + "color": "light-green", + "index": 0 + }, + "Warning": { + "color": "light-yellow", + "index": 1 + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "protocol" + }, + "properties": [ + { + "id": "custom.width", + "value": 81 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "mediatype" + }, + "properties": [ + { + "id": "custom.width", + "value": 94 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "manufacturer" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "slot" + }, + "properties": [ + { + "id": "custom.width", + "value": 84 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "serial" + }, + "properties": [ + { + "id": "custom.width", + "value": 211 + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 31, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "slot" + } + ] + }, + "pluginVersion": "11.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(r730_idrac_redfish_system_storage_volume_capacity)", - "groupBy": [], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"power-state\" FROM \"esxi-01-idrac\" WHERE $timeFilter", + "expr": "r730_idrac_idrac_storage_drive_info{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "instant": false, "range": true, - "rawQuery": false, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "storage-status" - ], - "type": "field" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] + "refId": "INFO" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "r730_idrac_idrac_storage_drive_health{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "hide": false, + "instant": false, + "range": true, + "refId": "HEALTH" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "r730_idrac_idrac_storage_drive_capacity_bytes{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "hide": false, + "instant": false, + "range": true, + "refId": "CAPACITY" } ], - "title": "Total Storage", - "type": "stat" + "title": "Disk Drives", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "id", + "mode": "outer" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "(manufacturer|mediatype|model|protocol|serial|slot|status|Value #CAPACITY)" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value #CAPACITY": 6, + "manufacturer": 1, + "mediatype": 2, + "model": 3, + "protocol": 4, + "serial": 5, + "slot": 0, + "status": 7 + }, + "renameByName": { + "Value #CAPACITY": "Capacity" + } + } + } + ], + "type": "table" }, { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "thresholds" }, "custom": { - "axisBorderShow": true, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMax": 200, - "axisSoftMin": 100, - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "align": "auto", + "cellOptions": { + "type": "auto" }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } + "inspect": false }, - "displayName": "${__field.name:wrap}", - "fieldMinMax": true, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "transparent", + "color": "green", "value": null }, - { - "color": "orange", - "value": 1600 - }, { "color": "red", - "value": 1974 + "value": 80 } ] - }, - "unit": "watt" + } }, "overrides": [ { "matcher": { "id": "byName", - "options": "System Watts" + "options": "Link" }, "properties": [ { - "id": "color", + "id": "custom.cellOptions", "value": { - "fixedColor": "rgb(91, 151, 235)", - "mode": "fixed" + "type": "color-text" } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "Down": { + "color": "light-yellow", + "index": 1 + }, + "Up": { + "color": "light-green", + "index": 0 + } + }, + "type": "value" + } + ] + }, + { + "id": "custom.width", + "value": 231 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #SPEED" + }, + "properties": [ + { + "id": "unit", + "value": "Mbits" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Speed" + }, + "properties": [ + { + "id": "custom.width", + "value": 102 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*status.*" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "Critical": { + "color": "light-red", + "index": 2 + }, + "OK": { + "color": "light-green", + "index": 0 + }, + "Warning": { + "color": "light-yellow", + "index": 1 + } + }, + "type": "value" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Port status" + }, + "properties": [ + { + "id": "custom.width", + "value": 278 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Port/Interface" + }, + "properties": [ + { + "id": "custom.width", + "value": 203 } ] } @@ -611,279 +1760,142 @@ "h": 9, "w": 12, "x": 12, - "y": 1 + "y": 11 }, - "id": 147, + "id": 34, "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "min" + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" ], - "displayMode": "list", - "placement": "bottom", - "showLegend": false + "show": false }, - "tooltip": { - "hoverProximity": 10, - "mode": "single", - "sort": "none" - } + "showHeader": true, + "sortBy": [] }, - "pluginVersion": "7.3.5", + "pluginVersion": "11.1.0", "targets": [ { - "alias": "System Watts", "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", - "expr": "r730_idrac_redfish_chassis_power_average_consumed_watts", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", + "expr": "r730_idrac_idrac_network_port_health{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "instant": false, "range": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "system-watts" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] + "refId": "PORT_HEALTH" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "r730_idrac_idrac_network_port_link_up{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "hide": false, + "instant": false, + "range": true, + "refId": "LINK_UP" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "r730_idrac_idrac_network_port_current_speed_mbps{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "hide": false, + "instant": false, + "range": true, + "refId": "SPEED" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "r730_idrac_idrac_network_adapter_health{job=~\"$job\", instance=~\"$instance\"}", + "format": "table", + "hide": false, + "instant": false, + "range": true, + "refId": "INTERFACE_HEALTH" } ], - "title": "System Watts", - "transparent": true, - "type": "timeseries" + "title": "Network", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "id", + "mode": "outer" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "(id|name|status.+|status|Value #SPEED|Value #HEALTH|Value #LINK_UP|Value #PORT_HEALTH|Value #INTERFACE_HEALTH)" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Value #INTERFACE_HEALTH": true, + "Value #LINK_UP": true, + "Value #PORT_HEALTH": true + }, + "indexByName": { + "Value #INTERFACE_HEALTH": 7, + "Value #LINK_UP": 6, + "Value #PORT_HEALTH": 5, + "Value #SPEED": 1, + "id": 0, + "status 1": 3, + "status 2": 2, + "status 3": 4 + }, + "renameByName": { + "Value #CAPACITY": "Capacity", + "Value #PORT_HEALTH": "", + "Value #SPEED": "Speed", + "id": "Port/Interface", + "status 1": "Port status", + "status 2": "Link", + "status 3": "Interface status" + } + } + } + ], + "type": "table" }, { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": true, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "decimals": 1, - "displayName": "${__field.name:wrap}", - "fieldMinMax": true, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - }, - { - "color": "orange", - "value": 70 - }, - { - "color": "red", - "value": 84 - } - ] - }, - "unit": "celsius" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "CPU 1 Temp" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "rgb(91, 151, 235)", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "CPU 2 Temp" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "rgb(90, 161, 80)", - "mode": "fixed" - } - } - ] - } - ] - }, + "collapsed": false, "gridPos": { - "h": 7, - "w": 12, + "h": 1, + "w": 24, "x": 0, - "y": 3 + "y": 20 }, - "id": 35, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hoverProximity": 10, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "7.3.5", - "targets": [ - { - "alias": "CPU 1 Temp", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "r730_idrac_redfish_chassis_temperature_celsius{sensor=\"CPU1 Temp\"}", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "range": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "cpu1-temp" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - " / 10" - ], - "type": "math" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - } - ], - "title": "CPU Temp", - "type": "timeseries" + "id": 12, + "panels": [], + "title": "Sensors", + "type": "row" }, { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "fieldConfig": { "defaults": { @@ -891,1133 +1903,1010 @@ "mode": "palette-classic" }, "custom": { - "axisBorderShow": true, + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", + "fillOpacity": 10, + "gradientMode": "opacity", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "insertNulls": false, - "lineInterpolation": "linear", + "insertNulls": 3600000, + "lineInterpolation": "smooth", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", - "spanNulls": false, + "spanNulls": true, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { - "mode": "line" - } - }, - "decimals": 1, - "displayName": "${__field.name:wrap}", - "fieldMinMax": true, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - }, - { - "color": "orange", - "value": 42 - }, - { - "color": "red", - "value": 47 - } - ] - }, - "unit": "celsius" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Exhaust Temp" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "rgb(90, 161, 80)", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet Temp" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "rgb(91, 151, 235)", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 10 - }, - "id": 52, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hoverProximity": 10, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "7.3.5", - "targets": [ - { - "alias": "Inlet Temp", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "r730_idrac_redfish_chassis_temperature_celsius{sensor=\"System Board Inlet Temp\"}", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "range": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "inlet-temp" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - " / 10" - ], - "type": "math" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - } - ], - "title": "Air Temp", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": true, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line+area" - } - }, - "fieldMinMax": true, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - }, - { - "color": "orange", - "value": 1600 - }, - { - "color": "red", - "value": 1974 - } - ] - }, - "unit": "volt" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "System Watts" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "rgb(91, 151, 235)", - "mode": "fixed" - } - } - ] - }, - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "r730_idrac_redfish_chassis_power_voltage_volts{chassis_id=\"System.Embedded.1\", instance=\"192.168.1.4\", job=\"redfish-idrac\", power_voltage=\"PS2 Voltage 2\", power_voltage_id=\"iDRAC.Embedded.1#PS2Voltage\", resource=\"power_voltage\"}" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 10 - }, - "id": 652, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "min" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hoverProximity": 10, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "7.3.5", - "targets": [ - { - "alias": "System Watts", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "r730_idrac_redfish_chassis_power_voltage_volts{power_voltage=~\"PS2 Voltage.*\"}", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "range": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "system-watts" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - } - ], - "title": "Input Voltage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": true, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "decimals": 1, - "displayName": "${__field.name:wrap}", - "fieldMinMax": true, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - } - ] - }, - "unit": "celsius" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Exhaust Temp" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "rgb(90, 161, 80)", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet Temp" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "rgb(91, 151, 235)", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 651, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hoverProximity": 10, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "7.3.5", - "targets": [ - { - "alias": "Inlet Temp", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "r730_idrac_redfish_chassis_temperature_celsius{sensor=\"System Board Exhaust Temp\"}", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "range": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "inlet-temp" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - " / 10" - ], - "type": "math" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - } - ], - "title": "System Board Exhaust Temp", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": true, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMax": 240, - "axisSoftMin": 220, - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line+area" - } - }, - "displayName": "${__field.name:wrap}", - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - }, - { - "color": "orange", - "value": 1600 - }, - { - "color": "red", - "value": 1974 - } - ] - }, - "unit": "volt" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "System Watts" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "rgb(91, 151, 235)", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 17 - }, - "id": 653, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "min" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hoverProximity": 10, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "7.3.5", - "targets": [ - { - "alias": "System Watts", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "r730_idrac_redfish_chassis_power_voltage_volts{power_voltage=~\"PS1 Voltage.*\"}", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "range": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "system-watts" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - } - ], - "title": "Input Voltage (UPS power supply)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "green", - "mode": "fixed" - }, - "custom": { - "axisBorderShow": true, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "RPM", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line+area" + "mode": "off" } }, "decimals": 0, - "displayName": "${__field.name:wrap}", - "fieldMinMax": true, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "red", + "color": "green", "value": null - }, - { - "color": "orange", - "value": 720 - }, - { - "color": "transparent", - "value": 960 } ] }, - "unit": "none" + "unit": "celsius" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 9, "w": 12, "x": 0, - "y": 24 + "y": 21 }, - "id": 70, + "id": 14, "options": { "legend": { - "calcs": [], + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg_over_time(r730_idrac_idrac_sensors_temperature{instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])", + "instant": false, + "legendFormat": "{{name}}", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": true, + "refId": "A", + "timeField": "@timestamp" + } + ], + "title": "Temperature", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": 3600000, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": 3600000, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "rotrpm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg_over_time(r730_idrac_idrac_sensors_fan_speed{instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])", + "instant": false, + "legendFormat": "{{name}}", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": true, + "refId": "A", + "timeField": "@timestamp" + } + ], + "title": "Fans", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 17, + "panels": [], + "title": "Power", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": true, + "axisColorMode": "text", + "axisLabel": "In/Out", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": 3600000, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": 3600000, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "watt" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*Input.*" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 11, + "x": 0, + "y": 31 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "avg_over_time(r730_idrac_idrac_power_supply_output_watts{instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])", + "legendFormat": "Output PSU-{{id}}", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": true, + "refId": "A", + "timeField": "@timestamp" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "avg_over_time(r730_idrac_idrac_power_supply_input_watts{instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])", + "hide": false, + "legendFormat": "Input PSU-{{id}}", + "range": true, + "refId": "B" + } + ], + "title": "Power Supplies", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "volt" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 11, + "y": 31 + }, + "hideTimeOverride": true, + "id": 23, + "interval": "1m", + "maxDataPoints": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg(r730_idrac_idrac_power_supply_input_voltage{instance=~\"$instance\", job=~\"$job\"})", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" + } + ], + "timeFrom": "1m", + "title": "Voltage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": 3600000, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": 3600000, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "watt" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 11, + "x": 13, + "y": 31 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ + "mean" + ], "displayMode": "list", - "placement": "bottom", + "placement": "right", "showLegend": false }, "tooltip": { - "hoverProximity": 10, - "mode": "single", - "sort": "none" + "maxHeight": 600, + "mode": "multi", + "sort": "desc" } }, - "pluginVersion": "7.3.5", "targets": [ { - "alias": "Fan 1", + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", - "expr": "avg(r730_idrac_redfish_chassis_fan_rpm)", - "groupBy": [ + "expr": "avg_over_time(r730_idrac_idrac_power_control_avg_consumed_watts{instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])", + "legendFormat": "{{name}}", + "metrics": [ { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" + "id": "1", + "type": "count" } ], - "interval": "", - "legendFormat": "", - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", + "query": "", "range": true, "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "fan1-speed" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - }, - { - "alias": "Fan 2", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "fan2-speed" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - }, - { - "alias": "Fan 3", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "fan3-speed" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - }, - { - "alias": "Fan 4", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "refId": "D", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "fan4-speed" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - }, - { - "alias": "Fan 5", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "refId": "E", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "fan5-speed" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] - }, - { - "alias": "Fan 6", - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "none" - ], - "type": "fill" - } - ], - "measurement": "idrac-hosts", - "orderByTime": "ASC", - "policy": "default", - "refId": "F", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "fan6-speed" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [ - { - "key": "system-name", - "operator": "=~", - "value": "/^$idrac_host$/" - } - ] + "timeField": "@timestamp" } ], - "title": "Average Fan Speed", + "title": "Power Consumption", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "watt" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 11, + "y": 35 + }, + "hideTimeOverride": true, + "id": 24, + "interval": "1m", + "maxDataPoints": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "r730_idrac_idrac_power_control_capacity_watts{id=\"0\",instance=~\"$instance\", job=~\"$job\"}", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "range": false, + "refId": "A", + "timeField": "@timestamp" + } + ], + "timeFrom": "1m", + "title": "Capacity", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 29, + "panels": [], + "title": "Event log", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsSystem" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "id" + }, + "properties": [ + { + "id": "custom.width", + "value": 52 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.width", + "value": 168 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "severity" + }, + "properties": [ + { + "id": "custom.width", + "value": 91 + }, + { + "id": "mappings", + "value": [ + { + "options": { + "Critical": { + "color": "red", + "index": 0 + }, + "OK": { + "color": "green", + "index": 2 + }, + "Warning": { + "color": "yellow", + "index": 1 + } + }, + "type": "value" + } + ] + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 27, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "last_over_time(r730_idrac_idrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"}[$__interval] offset -$__interval) * 1000 < ${__to}\nand \nlast_over_time(r730_idrac_idrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"}[$__interval] offset -$__interval) * 1000 > ${__from}", + "format": "table", + "instant": false, + "interval": "1", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "System Event Log (SEL)", + "transformations": [ + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "id" + } + ], + "fields": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Time" + } + ] + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "id", + "message", + "severity", + "Value" + ], + "pattern": "message|id|severity|Value" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value": 0, + "id": 2, + "instance": 1, + "message": 4, + "severity": 3 + }, + "renameByName": { + "Value": "Time" + } + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Time": { + "aggregations": [], + "operation": "groupby" + }, + "id": { + "aggregations": [], + "operation": "groupby" + }, + "message": { + "aggregations": [], + "operation": "groupby" + }, + "severity": { + "aggregations": [], + "operation": "groupby" + } + } + } + } + ], + "type": "table" } ], - "refresh": "1m", + "refresh": "auto", "schemaVersion": 39, "tags": [ + "dell", "idrac", - "telegraf", - "snmp" + "lenovo", + "redfish" ], "templating": { - "list": [] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "PBFA97CFB590B2093" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": ".+", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(r730_idrac_idrac_system_machine_info,job)", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [], + "query": "label_values(r730_idrac_idrac_system_machine_info,job)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": false, + "text": "192.168.1.4", + "value": "192.168.1.4" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(r730_idrac_idrac_system_machine_info{job=~\"$job\"},instance)", + "hide": 0, + "includeAll": false, + "label": "", + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(r730_idrac_idrac_system_machine_info{job=~\"$job\"},instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "type": "query" + } ] }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, "timezone": "", - "title": "iDRAC - Host Stats", - "uid": "O19gr0jZk", - "version": 24, + "title": "IDRAC", + "uid": "YVz226S4z", + "version": 3, "weekStart": "" } diff --git a/modules/kubernetes/monitoring/dashboards/nvidia.json b/modules/kubernetes/monitoring/dashboards/nvidia.json new file mode 100644 index 00000000..27ca72ea --- /dev/null +++ b/modules/kubernetes/monitoring/dashboards/nvidia.json @@ -0,0 +1,710 @@ +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:192", + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "This dashboard is to display the metrics from DCGM Exporter on a Kubernetes (1.13+) cluster", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 12239, + "graphTooltip": 0, + "id": 26, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "celsius" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 18, + "x": 0, + "y": 0 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP", + "instant": false, + "interval": "", + "legendFormat": "GPU 0", + "refId": "A" + } + ], + "title": "GPU Temperature", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 70 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "celsius" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 14, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "GPU Current Temp", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "watt" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 18, + "x": 0, + "y": 8 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "6.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "nvidia_tesla_t4_DCGM_FI_DEV_POWER_USAGE", + "interval": "", + "legendFormat": "GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU Power Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 2400, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1800 + }, + { + "color": "red", + "value": 2200 + } + ] + }, + "unit": "watt" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 8 + }, + "id": 16, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(nvidia_tesla_t4_DCGM_FI_DEV_POWER_USAGE)", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "GPU Power Total", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_UTIL", + "interval": "", + "legendFormat": "GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU Utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decmbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "nvidia_tesla_t4_DCGM_FI_DEV_FB_USED", + "interval": "", + "legendFormat": "GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU Framebuffer Mem Used", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "hertz" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 2, + "interval": "", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "nvidia_tesla_t4_DCGM_FI_DEV_SM_CLOCK* 1000000", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU SM Clocks", + "type": "timeseries" + } + ], + "refresh": "auto", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "NVIDIA DCGM Exporter Dashboard", + "uid": "Oxed_c6Wz", + "version": 7, + "weekStart": "" +} diff --git a/modules/kubernetes/monitoring/main.tf b/modules/kubernetes/monitoring/main.tf index 95d56c14..b2ef7ae1 100644 --- a/modules/kubernetes/monitoring/main.tf +++ b/modules/kubernetes/monitoring/main.tf @@ -333,6 +333,8 @@ resource "kubernetes_config_map" "redfish-config" { } data = { "config.yml" = <<-EOF + address: 0.0.0.0 + port: 9610 hosts: ${var.idrac_host}: username: ${var.idrac_username} @@ -340,10 +342,8 @@ resource "kubernetes_config_map" "redfish-config" { default: username: root password: calvin - groups: - group1: - username: user - password: pass + metrics: + all: true EOF } } @@ -374,20 +374,17 @@ resource "kubernetes_deployment" "idrac-redfish" { } spec { container { - image = "viktorbarzin/redfish-exporter:latest" + # https://github.com/mrlhansen/idrac_exporter?tab=readme-ov-file + image = "ghcr.io/mrlhansen/idrac_exporter:latest" name = "redfish-exporter" - # command = ["/bin/sh", "-c", "redfish-exporter --config.file /app/config.yml"] - # command = ["/usr/local/bin/redfish_exporter", "--config.file", "/etc/prometheus/redfish_exporter.yml"] - command = ["/usr/local/bin/redfish_exporter", "--config.file", "/app/config.yml"] port { container_port = 9610 } volume_mount { name = "redfish-exporter-config" - mount_path = "/app/config.yml" - # mount_path = "/etc/prometheus/redfish_exporter.yml" - sub_path = "config.yml" + mount_path = "/etc/prometheus/idrac.yml" + sub_path = "config.yml" } } volume { diff --git a/modules/kubernetes/monitoring/prometheus_chart_values.tpl b/modules/kubernetes/monitoring/prometheus_chart_values.tpl index f5805d13..d1541383 100644 --- a/modules/kubernetes/monitoring/prometheus_chart_values.tpl +++ b/modules/kubernetes/monitoring/prometheus_chart_values.tpl @@ -81,7 +81,7 @@ server: # enabled: false existingClaim: prometheus-iscsi-pvc # storageClass: rook-cephfs - retention: "12w" + retention: "52w" strategy: type: Recreate baseURL: "https://prometheus.viktorbarzin.me" @@ -222,8 +222,15 @@ serverFiles: severity: page annotations: summary: Power voltage on a power supply is {{ $value }} indicating power outage. + - alert: HighGPUTemp + expr: nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP > 65 + for: 1m + labels: + severity: page + annotations: + summary: "High GPU Temperature {{$value}}" - alert: HighPowerUsage - expr: (max_over_time(r730_idrac_redfish_chassis_power_average_consumed_watts[20m])) > 180 + expr: r730_idrac_idrac_power_control_consumed_watts > 200 for: 60m labels: severity: page @@ -237,7 +244,7 @@ serverFiles: annotations: summary: No node load data. Can signal that prometheus is not scraping - alert: NoiDRACData - expr: (max(r730_idrac_redfish_chassis_power_average_consumed_watts) or on() vector(0)) == 0 + expr: (max(r730_idrac_idrac_system_health + 1) or on() vector(0)) == 0 for: 30m labels: severity: page @@ -359,15 +366,13 @@ extraScrapeConfigs: | - targets: - "idrac.viktorbarzin.lan:161" metrics_path: '/snmp' - params: - module: [dell_idrac] relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ - replacement: 'prometheus-snmp-exporter.monitoring.svc.cluster.local:9116' + replacement: 'snmp-exporter.monitoring.svc.cluster.local:9116' metric_relabel_configs: - source_labels: [ __name__ ] target_label: '__name__' @@ -375,9 +380,9 @@ extraScrapeConfigs: | regex: '(.*)' replacement: 'r730_idrac_$${1}' - job_name: 'redfish-idrac' - scrape_interval: 5m - scrape_timeout: 4m - metrics_path: /redfish + scrape_interval: 3m + scrape_timeout: 1m + metrics_path: /metrics static_configs: - targets: - 192.168.1.4 @@ -492,3 +497,15 @@ extraScrapeConfigs: | - "ha-sofia.viktorbarzin.lan:8123" metrics_path: '/api/prometheus' bearer_token: "${haos_api_token}" + - job_name: 'nvidia' + static_configs: + - targets: + - "nvidia-exporter.nvidia.svc.cluster.local" + metrics_path: '/metrics' + metric_relabel_configs: + - source_labels: [ __name__ ] + target_label: '__name__' + action: replace + regex: '(.*)' + replacement: 'nvidia_tesla_t4_$${1}' +