diff --git a/modules/kubernetes/monitoring/dashboards/cluster_health.json b/modules/kubernetes/monitoring/dashboards/cluster_health.json index dcc123b3..971d99f7 100644 --- a/modules/kubernetes/monitoring/dashboards/cluster_health.json +++ b/modules/kubernetes/monitoring/dashboards/cluster_health.json @@ -15,337 +15,1055 @@ } ] }, - "description": "Single-pane-of-glass daily health overview — nodes, pods, quotas, storage, certs, GPU, power", + "description": "Single-pane-of-glass daily health overview \u2014 nodes, pods, quotas, storage, certs, GPU, power", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, - "links": [], + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": false, + "keepTime": true, + "tags": [], + "targetBlank": true, + "title": "Dashboards", + "type": "dashboards", + "url": "" + }, + { + "title": "API Server", + "type": "link", + "url": "/d/k8s_system_apisrv", + "icon": "cloud", + "targetBlank": true, + "keepTime": true, + "tooltip": "Kubernetes API Server" + }, + { + "title": "Nodes", + "type": "link", + "url": "/d/k8s_views_nodes", + "icon": "dashboard", + "targetBlank": true, + "keepTime": true, + "tooltip": "Kubernetes Nodes View" + }, + { + "title": "Pods", + "type": "link", + "url": "/d/k8s_views_pods", + "icon": "dashboard", + "targetBlank": true, + "keepTime": true, + "tooltip": "Kubernetes Pods View" + }, + { + "title": "GPU", + "type": "link", + "url": "/d/Oxed_c6Wz", + "icon": "bolt", + "targetBlank": true, + "keepTime": true, + "tooltip": "NVIDIA DCGM Exporter" + }, + { + "title": "iDRAC", + "type": "link", + "url": "/d/YVz226S4z", + "icon": "server", + "targetBlank": true, + "keepTime": true, + "tooltip": "Dell iDRAC Hardware" + }, + { + "title": "UPS", + "type": "link", + "url": "/d/ee70yskqw5u68f", + "icon": "battery-full", + "targetBlank": true, + "keepTime": true, + "tooltip": "Huawei UPS 2000" + }, + { + "title": "CoreDNS", + "type": "link", + "url": "/d/wY4blRMGz", + "icon": "signal", + "targetBlank": true, + "keepTime": true, + "tooltip": "CoreDNS" + }, + { + "title": "Node Exporter", + "type": "link", + "url": "/d/rYdddlPWk", + "icon": "monitor", + "targetBlank": true, + "keepTime": true, + "tooltip": "Node Exporter Full" + }, + { + "title": "Docker Registry", + "type": "link", + "url": "/d/CoBSgj8iz", + "icon": "cube", + "targetBlank": true, + "keepTime": true, + "tooltip": "Docker Registry" + }, + { + "title": "Traefik", + "type": "link", + "url": "/d/n5bu_kv45", + "icon": "exchange-alt", + "targetBlank": true, + "keepTime": true, + "tooltip": "Traefik Ingress" + }, + { + "title": "Loki Logs", + "type": "link", + "url": "/d/o6-BGgnnk", + "icon": "file-alt", + "targetBlank": true, + "keepTime": true, + "tooltip": "Loki Kubernetes Logs" + }, + { + "title": "kube-state-metrics", + "type": "link", + "url": "/d/garysdevil-kube-state-metrics-v2", + "icon": "graph-bar", + "targetBlank": true, + "keepTime": true, + "tooltip": "kube-state-metrics v2" + }, + { + "title": "Proxmox", + "type": "link", + "url": "/d/rYdddlPW", + "icon": "server", + "targetBlank": true, + "keepTime": true, + "tooltip": "Proxmox Node Exporter" + } + ], "panels": [ { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 1, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 70, "panels": [], - "title": "Cluster Summary", + "title": "Infrastructure", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "orange", "value": 3 }, - { "color": "green", "value": 5 } + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } ] - } + }, + "unit": "celsius" }, "overrides": [] }, - "gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 }, - "id": 2, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "textMode": "auto" + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 1 }, - "title": "Nodes Ready", - "type": "stat", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\"})", - "legendFormat": "Ready", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "blue", "value": null } - ] - } - }, - "overrides": [] - }, - "gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 }, - "id": 3, + "id": 71, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, - "title": "Total Pods", + "title": "GPU Temp", "type": "stat", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "count(kube_pod_info)", - "legendFormat": "Pods", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP", + "legendFormat": "GPU Temp", "refId": "A" } ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 60 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 72, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "title": "GPU Util %", + "type": "gauge", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_UTIL", + "legendFormat": "GPU Util", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 12000 + }, + { + "color": "red", + "value": 14000 + } + ] + }, + "unit": "decmbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 73, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "GPU VRAM Used", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "nvidia_tesla_t4_DCGM_FI_DEV_FB_USED", + "legendFormat": "VRAM", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "green", + "value": 30 + } + ] + }, + "unit": "m" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 75, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "UPS Battery (min)", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "ups_upsEstimatedMinutesRemaining", + "legendFormat": "Battery", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 30 + }, + { + "color": "green", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 76, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "title": "UPS Charge %", + "type": "gauge", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "ups_upsEstimatedChargeRemaining", + "legendFormat": "Charge", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "celsius" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 1 + }, + "id": 78, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "CPU Temp", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "r730_idrac_idrac_sensors_temperature{name=\"CPU1 Temp\"}", + "legendFormat": "CPU Temp", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, "mappings": [ { - "options": { "0": { "color": "green", "text": "0" } }, + "options": { + "from": 1, + "to": 999999, + "result": { + "color": "red", + "text": "OUTAGE" + } + }, + "type": "range" + }, + { + "options": { + "0": { + "color": "green", + "text": "MAINS" + } + }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 1 } + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 1 + }, + "id": 93, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "Power Source", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "ups_upsSecondsOnBattery", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "green", + "text": "0" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } ] }, "noValue": "0" }, "overrides": [] }, - "gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 }, - "id": 4, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 94, "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, - "title": "Firing Alerts", + "title": "UPS Alarms", "type": "stat", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "count(ALERTS{alertstate=\"firing\"}) OR vector(0)", - "legendFormat": "Firing", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "ups_upsAlarmsPresent", + "legendFormat": "", "refId": "A" } ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, "mappings": [], - "max": 100, - "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 60 }, - { "color": "red", "value": 80 } + { + "color": "green", + "value": null + } ] }, - "unit": "percent" + "unit": "watt", + "min": 0 }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Solar Production" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FADE2A", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 20 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Temp" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "celsius" + }, + { + "id": "color", + "value": { + "fixedColor": "#FF9830", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] }, - "gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 }, - "id": 5, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 77, "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "title": "Cluster CPU %", - "type": "gauge", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "avg(1 - rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100", - "legendFormat": "CPU", - "refId": "A" + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 60 }, - { "color": "red", "value": 80 } - ] - }, - "unit": "percent" - }, - "overrides": [] }, - "gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 }, - "id": 6, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "title": "Cluster Memory %", - "type": "gauge", + "title": "Power Draw (24h)", + "type": "timeseries", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "(1 - sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)) * 100", - "legendFormat": "Memory", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 70 }, - { "color": "red", "value": 85 } - ] + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 }, - "id": 7, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "title": "Cluster Disk %", - "type": "gauge", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "(1 - sum(node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs\"})) * 100", - "legendFormat": "Disk", + "expr": "avg_over_time(r730_idrac_idrac_power_control_avg_consumed_watts[$__rate_interval])", + "legendFormat": "Consumed", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "haos_sensor_power_w{entity=\"sensor.fv_b_pv_power\"}", + "legendFormat": "Solar Production", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "r730_idrac_idrac_sensors_temperature{name=\"CPU1 Temp\"}", + "legendFormat": "CPU Temp", + "refId": "C" } ] }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, "id": 10, "panels": [], "title": "Node Health", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "custom": { "align": "auto", - "cellOptions": { "type": "auto" }, + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 60 }, - { "color": "red", "value": 80 } + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 60 + }, + { + "color": "red", + "value": 80 + } ] } }, "overrides": [ { - "matcher": { "id": "byName", "options": "Node" }, - "properties": [{ "id": "custom.width", "value": 160 }] - }, - { - "matcher": { "id": "byName", "options": "CPU %" }, + "matcher": { + "id": "byName", + "options": "Node" + }, "properties": [ - { "id": "unit", "value": "percent" }, - { "id": "decimals", "value": 1 }, - { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } } + { + "id": "custom.width", + "value": 160 + } ] }, { - "matcher": { "id": "byName", "options": "Memory %" }, + "matcher": { + "id": "byName", + "options": "CPU %" + }, "properties": [ - { "id": "unit", "value": "percent" }, - { "id": "decimals", "value": 1 }, - { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } } + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + } ] }, { - "matcher": { "id": "byName", "options": "Disk %" }, + "matcher": { + "id": "byName", + "options": "Memory %" + }, "properties": [ - { "id": "unit", "value": "percent" }, - { "id": "decimals", "value": 1 }, - { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } } + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + } ] }, { - "matcher": { "id": "byName", "options": "Pods" }, - "properties": [{ "id": "custom.width", "value": 60 }] + "matcher": { + "id": "byName", + "options": "Disk %" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pods" + }, + "properties": [ + { + "id": "custom.width", + "value": 60 + } + ] } ] }, - "gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 14 + }, "id": 11, "options": { "cellHeight": "sm", - "footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false }, + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true, - "sortBy": [{ "desc": true, "displayName": "CPU %" }] + "sortBy": [ + { + "desc": true, + "displayName": "CPU %" + } + ] }, "title": "Node Resource Table", "type": "table", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "(1 - avg by(instance)(rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100", "format": "table", "instant": true, @@ -353,7 +1071,10 @@ "refId": "CPU" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100", "format": "table", "instant": true, @@ -361,7 +1082,10 @@ "refId": "MEM" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "(1 - node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs\"}) * 100", "format": "table", "instant": true, @@ -369,7 +1093,10 @@ "refId": "DISK" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "count by(node)(kube_pod_info)", "format": "table", "instant": true, @@ -385,7 +1112,9 @@ { "id": "organize", "options": { - "excludeByName": { "Time": true }, + "excludeByName": { + "Time": true + }, "renameByName": { "instance": "Node", "node": "Node", @@ -399,10 +1128,15 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "palette-classic" }, + "color": { + "mode": "palette-classic" + }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, @@ -414,23 +1148,40 @@ "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, - "scaleDistribution": { "type": "linear" }, + "scaleDistribution": { + "type": "linear" + }, "showPoints": "never", "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 80 } + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } ] }, "unit": "percent", @@ -439,17 +1190,36 @@ }, "overrides": [] }, - "gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 14 + }, "id": 12, "options": { - "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, "title": "CPU per Node (24h)", "type": "timeseries", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "(1 - avg by(instance)(rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100", "legendFormat": "{{instance}}", "refId": "A" @@ -457,10 +1227,15 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "palette-classic" }, + "color": { + "mode": "palette-classic" + }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, @@ -472,23 +1247,40 @@ "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, - "scaleDistribution": { "type": "linear" }, + "scaleDistribution": { + "type": "linear" + }, "showPoints": "never", "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 80 } + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } ] }, "unit": "percent", @@ -497,17 +1289,36 @@ }, "overrides": [] }, - "gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 14 + }, "id": 13, "options": { - "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, "title": "Memory per Node (24h)", "type": "timeseries", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100", "legendFormat": "{{instance}}", "refId": "A" @@ -516,64 +1327,1622 @@ }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, - "id": 20, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 40, "panels": [], - "title": "Resource Governance — Quota Utilization", + "title": "Top Resource Consumers", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 23 + }, + "id": 41, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "title": "Top 10 Namespaces by CPU", + "type": "bargauge", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "topk(10, sum by(namespace)(rate(container_cpu_usage_seconds_total{container!=\"\"}[5m])))", + "refId": "A", + "format": "table", + "instant": true + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "container": true, + "endpoint": true, + "instance": true, + "job": true, + "service": true, + "uid": true, + "id": true, + "metrics_path": true, + "node": true + }, + "renameByName": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "Value", + "desc": true + } + ] + } + }, + { + "id": "rowsToFields", + "options": {} + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 23 + }, + "id": 42, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "title": "Top 10 Namespaces by Memory", + "type": "bargauge", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "topk(10, sum by(namespace)(container_memory_working_set_bytes{container!=\"\"}))", + "refId": "A", + "format": "table", + "instant": true + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "container": true, + "endpoint": true, + "instance": true, + "job": true, + "service": true, + "uid": true, + "id": true, + "metrics_path": true, + "node": true + }, + "renameByName": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "Value", + "desc": true + } + ] + } + }, + { + "id": "rowsToFields", + "options": {} + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 23 + }, + "id": 43, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "left", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "title": "Top 10 Pods by Memory", + "type": "bargauge", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "topk(10, container_memory_working_set_bytes{container!=\"\"})", + "refId": "A", + "format": "table", + "instant": true + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "container": true, + "endpoint": true, + "instance": true, + "job": true, + "namespace": true, + "service": true, + "uid": true, + "id": true, + "image": true, + "metrics_path": true, + "name": true, + "node": true + }, + "renameByName": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "Value", + "desc": true + } + ] + } + }, + { + "id": "rowsToFields", + "options": {} + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 80, + "panels": [], + "title": "Key Services", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "text": "DOWN" + }, + "1": { + "color": "green", + "text": "UP" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 32 + }, + "id": 81, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "API Server", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "up{job=\"apiserver\"}", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "percent", + "noValue": "0", + "decimals": 2 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 32 + }, + "id": 82, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "API Server Error %", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(apiserver_request_total{code=~\"5..\"}[5m])) / sum(rate(apiserver_request_total[5m])) * 100", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "green", + "value": 80 + } + ] + }, + "unit": "percent", + "decimals": 1 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 32 + }, + "id": 83, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "DNS Cache Hit %", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(coredns_cache_hits_total[5m])) / (sum(rate(coredns_cache_hits_total[5m])) + sum(rate(coredns_cache_misses_total[5m]))) * 100", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "reqps", + "decimals": 1 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 32 + }, + "id": 84, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "Traefik req/s", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(traefik_entrypoint_requests_total[5m]))", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 5 + }, + { + "color": "red", + "value": 15 + } + ] + }, + "unit": "percent", + "noValue": "0", + "decimals": 1 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 32 + }, + "id": 85, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "Traefik Error %", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(traefik_entrypoint_requests_total{code=~\"[45]..\"}[5m])) / sum(rate(traefik_entrypoint_requests_total[5m])) * 100", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "green", + "value": 80 + } + ] + }, + "unit": "percent", + "decimals": 1 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 32 + }, + "id": 86, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "Registry Cache %", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(registry_registry_storage_cache_total{type=\"Hit\"}) / sum(registry_registry_storage_cache_total) * 100", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 36 + }, + "id": 87, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "Open Connections", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(traefik_open_connections)", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "s", + "decimals": 2 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 36 + }, + "id": 88, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "API Latency P99", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!~\"WATCH|CONNECT\"}[5m])) by (le))", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "reqps", + "decimals": 1 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 36 + }, + "id": 89, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "DNS req/s", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(coredns_dns_requests_total[5m]))", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.1 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s", + "decimals": 3 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 36 + }, + "id": 90, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "DNS Latency P99", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "s", + "decimals": 2 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 36 + }, + "id": 91, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "Ingress Latency P99", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(traefik_entrypoint_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "", + "refId": "A" + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 1, + "panels": [], + "title": "Cluster Summary", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "green", + "value": 5 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 41 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "Nodes Ready", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\"})", + "legendFormat": "Ready", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 41 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "Total Pods", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "count(kube_pod_info)", + "legendFormat": "Pods", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "green", + "text": "0" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "noValue": "0" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 41 + }, + "id": 4, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "title": "Firing Alerts", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "count(ALERTS{alertstate=\"firing\"}) OR vector(0)", + "legendFormat": "Firing", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 60 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 41 + }, + "id": 5, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "title": "Cluster CPU %", + "type": "gauge", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "avg(1 - rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100", + "legendFormat": "CPU", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 60 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 41 + }, + "id": 6, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "title": "Cluster Memory %", + "type": "gauge", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(1 - sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)) * 100", + "legendFormat": "Memory", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 41 + }, + "id": 7, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "title": "Cluster Disk %", + "type": "gauge", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(1 - sum(node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs\"})) * 100", + "legendFormat": "Disk", + "refId": "A" + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 20, + "panels": [], + "title": "Resource Governance \u2014 Quota Utilization", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, "custom": { "align": "auto", - "cellOptions": { "type": "auto" }, + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 70 }, - { "color": "red", "value": 90 } + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 90 + } ] } }, "overrides": [ { - "matcher": { "id": "byName", "options": "Namespace" }, - "properties": [{ "id": "custom.width", "value": 160 }] - }, - { - "matcher": { "id": "byName", "options": "Resource" }, - "properties": [{ "id": "custom.width", "value": 120 }] - }, - { - "matcher": { "id": "byName", "options": "Usage %" }, + "matcher": { + "id": "byName", + "options": "Namespace" + }, "properties": [ - { "id": "unit", "value": "percent" }, - { "id": "decimals", "value": 1 }, - { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } } + { + "id": "custom.width", + "value": 160 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Resource" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Usage %" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + } ] } ] }, - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 46 + }, "id": 21, "options": { "cellHeight": "sm", - "footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false }, + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true, - "sortBy": [{ "desc": true, "displayName": "Usage %" }] + "sortBy": [ + { + "desc": true, + "displayName": "Usage %" + } + ] }, "title": "Quota Usage by Namespace", "type": "table", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "kube_resourcequota{type=\"used\"}", "format": "table", "instant": true, @@ -581,7 +2950,10 @@ "refId": "USED" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "kube_resourcequota{type=\"hard\"}", "format": "table", "instant": true, @@ -598,15 +2970,29 @@ "id": "calculateField", "options": { "alias": "Usage %", - "binary": { "left": "Value #USED", "operator": "/", "right": "Value #HARD", "reducer": "sum" }, + "binary": { + "left": "Value #USED", + "operator": "/", + "right": "Value #HARD", + "reducer": "sum" + }, "mode": "binary", - "reduce": { "reducer": "sum" } + "reduce": { + "reducer": "sum" + } } }, { "id": "organize", "options": { - "excludeByName": { "Time": true, "__name__": true, "job": true, "instance": true, "type": true, "resourcequota": true }, + "excludeByName": { + "Time": true, + "__name__": true, + "job": true, + "instance": true, + "type": true, + "resourcequota": true + }, "renameByName": { "namespace": "Namespace", "resource": "Resource", @@ -621,7 +3007,12 @@ "options": { "filters": [ { - "config": { "id": "greater", "options": { "value": 0 } }, + "config": { + "id": "greater", + "options": { + "value": 0 + } + }, "fieldName": "Hard Limit" } ], @@ -632,26 +3023,45 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 70 }, - { "color": "red", "value": 90 } + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 90 + } ] }, "unit": "percent" }, "overrides": [] }, - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 46 + }, "id": 22, "options": { "displayMode": "gradient", @@ -660,7 +3070,13 @@ "minVizWidth": 8, "namePlacement": "auto", "orientation": "horizontal", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showUnfilled": true, "sizing": "auto", "valueMode": "color" @@ -669,53 +3085,124 @@ "type": "bargauge", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "topk(15, kube_resourcequota{type=\"used\",resource=~\"requests.cpu|requests.memory|limits.cpu|limits.memory|pods\"} / kube_resourcequota{type=\"hard\",resource=~\"requests.cpu|requests.memory|limits.cpu|limits.memory|pods\"} * 100 > 0)", - "legendFormat": "{{namespace}} / {{resource}}", - "refId": "A" + "refId": "A", + "format": "table", + "instant": true + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "job": true, + "instance": true, + "endpoint": true, + "service": true, + "uid": true, + "type": true, + "resourcequota": true, + "metrics_path": true, + "resource": true + }, + "renameByName": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "Value", + "desc": true + } + ] + } + }, + { + "id": "rowsToFields", + "options": {} } ] }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, "id": 30, "panels": [], "title": "Pod Health", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 1 } + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } ] }, "noValue": "0" }, "overrides": [] }, - "gridPos": { "h": 4, "w": 3, "x": 0, "y": 24 }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 55 + }, "id": 31, "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "title": "CrashLooping Pods", "type": "stat", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "count(increase(kube_pod_container_status_restarts_total[1h]) > 5) OR vector(0)", "legendFormat": "", "refId": "A" @@ -723,38 +3210,66 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 1 }, - { "color": "red", "value": 5 } + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 5 + } ] }, "noValue": "0" }, "overrides": [] }, - "gridPos": { "h": 4, "w": 3, "x": 3, "y": 24 }, + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 55 + }, "id": 32, "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "title": "OOMKilled (24h)", "type": "stat", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "sum(increase(kube_pod_container_status_last_terminated_reason{reason=\"OOMKilled\"}[24h])) OR vector(0)", "legendFormat": "", "refId": "A" @@ -762,37 +3277,62 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 1 } + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } ] }, "noValue": "0" }, "overrides": [] }, - "gridPos": { "h": 4, "w": 3, "x": 6, "y": 24 }, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 55 + }, "id": 33, "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "title": "Failed Pods", "type": "stat", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "count(kube_pod_status_phase{phase=\"Failed\"}) OR vector(0)", "legendFormat": "", "refId": "A" @@ -800,38 +3340,66 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 1 }, - { "color": "red", "value": 5 } + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 5 + } ] }, "noValue": "0" }, "overrides": [] }, - "gridPos": { "h": 4, "w": 3, "x": 9, "y": 24 }, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 55 + }, "id": 34, "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "title": "Pending Pods", "type": "stat", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "count(kube_pod_status_phase{phase=\"Pending\"}) OR vector(0)", "legendFormat": "", "refId": "A" @@ -839,10 +3407,15 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "palette-classic" }, + "color": { + "mode": "palette-classic" + }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, @@ -854,39 +3427,71 @@ "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, - "scaleDistribution": { "type": "linear" }, + "scaleDistribution": { + "type": "linear" + }, "showPoints": "never", "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null } + { + "color": "green", + "value": null + } ] }, "min": 0 }, "overrides": [] }, - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 55 + }, "id": 35, "options": { - "legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "legend": { + "calcs": [ + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, "title": "Restart Rate (24h)", "type": "timeseries", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "sum(increase(kube_pod_container_status_restarts_total[1h]))", "legendFormat": "Total Restarts / hour", "refId": "A" @@ -894,55 +3499,116 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "custom": { "align": "auto", - "cellOptions": { "type": "auto" }, + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 5 }, - { "color": "red", "value": 20 } + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 5 + }, + { + "color": "red", + "value": 20 + } ] } }, "overrides": [ { - "matcher": { "id": "byName", "options": "Namespace" }, - "properties": [{ "id": "custom.width", "value": 140 }] - }, - { - "matcher": { "id": "byName", "options": "Pod" }, - "properties": [{ "id": "custom.width", "value": 260 }] - }, - { - "matcher": { "id": "byName", "options": "Restarts" }, + "matcher": { + "id": "byName", + "options": "Namespace" + }, "properties": [ - { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } } + { + "id": "custom.width", + "value": 140 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "custom.width", + "value": 260 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Restarts" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + } ] } ] }, - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 28 }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 63 + }, "id": 36, "options": { "cellHeight": "sm", - "footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false }, + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true, - "sortBy": [{ "desc": true, "displayName": "Restarts" }] + "sortBy": [ + { + "desc": true, + "displayName": "Restarts" + } + ] }, "title": "High-Restart Pods", "type": "table", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "topk(15, kube_pod_container_status_restarts_total)", "format": "table", "instant": true, @@ -954,7 +3620,16 @@ { "id": "organize", "options": { - "excludeByName": { "Time": true, "__name__": true, "job": true, "instance": true, "uid": true, "container": true, "endpoint": true, "service": true }, + "excludeByName": { + "Time": true, + "__name__": true, + "job": true, + "instance": true, + "uid": true, + "container": true, + "endpoint": true, + "service": true + }, "renameByName": { "namespace": "Namespace", "pod": "Pod", @@ -966,203 +3641,160 @@ }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 36 }, - "id": 40, - "panels": [], - "title": "Top Resource Consumers", - "type": "row" - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "palette-classic" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null } - ] - }, - "unit": "short" - }, - "overrides": [] + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 71 }, - "gridPos": { "h": 8, "w": 8, "x": 0, "y": 37 }, - "id": 41, - "options": { - "displayMode": "gradient", - "maxVizHeight": 300, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "showUnfilled": true, - "sizing": "auto", - "valueMode": "color" - }, - "title": "Top 10 Namespaces by CPU", - "type": "bargauge", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "topk(10, sum by(namespace)(rate(container_cpu_usage_seconds_total{container!=\"\"}[5m])))", - "legendFormat": "{{namespace}}", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "palette-classic" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { "h": 8, "w": 8, "x": 8, "y": 37 }, - "id": 42, - "options": { - "displayMode": "gradient", - "maxVizHeight": 300, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "showUnfilled": true, - "sizing": "auto", - "valueMode": "color" - }, - "title": "Top 10 Namespaces by Memory", - "type": "bargauge", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "topk(10, sum by(namespace)(container_memory_working_set_bytes{container!=\"\"}))", - "legendFormat": "{{namespace}}", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "palette-classic" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { "h": 8, "w": 8, "x": 16, "y": 37 }, - "id": 43, - "options": { - "displayMode": "gradient", - "maxVizHeight": 300, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "showUnfilled": true, - "sizing": "auto", - "valueMode": "color" - }, - "title": "Top 10 Pods by Memory", - "type": "bargauge", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "topk(10, container_memory_working_set_bytes{container!=\"\"})", - "legendFormat": "{{namespace}}/{{pod}}", - "refId": "A" - } - ] - }, - { - "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 }, "id": 50, "panels": [], "title": "Storage", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "custom": { "align": "auto", - "cellOptions": { "type": "auto" }, + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 70 }, - { "color": "red", "value": 85 } + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } ] } }, "overrides": [ { - "matcher": { "id": "byName", "options": "Namespace" }, - "properties": [{ "id": "custom.width", "value": 140 }] - }, - { - "matcher": { "id": "byName", "options": "PVC" }, - "properties": [{ "id": "custom.width", "value": 260 }] - }, - { - "matcher": { "id": "byName", "options": "Used %" }, + "matcher": { + "id": "byName", + "options": "Namespace" + }, "properties": [ - { "id": "unit", "value": "percent" }, - { "id": "decimals", "value": 1 }, - { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } } + { + "id": "custom.width", + "value": 140 + } ] }, { - "matcher": { "id": "byName", "options": "Used" }, - "properties": [{ "id": "unit", "value": "bytes" }] + "matcher": { + "id": "byName", + "options": "PVC" + }, + "properties": [ + { + "id": "custom.width", + "value": 260 + } + ] }, { - "matcher": { "id": "byName", "options": "Capacity" }, - "properties": [{ "id": "unit", "value": "bytes" }] + "matcher": { + "id": "byName", + "options": "Used %" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Capacity" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] } ] }, - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 46 }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 72 + }, "id": 51, "options": { "cellHeight": "sm", - "footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false }, + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true, - "sortBy": [{ "desc": true, "displayName": "Used %" }] + "sortBy": [ + { + "desc": true, + "displayName": "Used %" + } + ] }, "title": "PV Usage Table", "type": "table", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "kubelet_volume_stats_used_bytes", "format": "table", "instant": true, @@ -1170,7 +3802,10 @@ "refId": "USED" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "kubelet_volume_stats_capacity_bytes", "format": "table", "instant": true, @@ -1187,14 +3822,27 @@ "id": "calculateField", "options": { "alias": "Used %", - "binary": { "left": "Value #USED", "operator": "/", "right": "Value #CAP" }, + "binary": { + "left": "Value #USED", + "operator": "/", + "right": "Value #CAP" + }, "mode": "binary" } }, { "id": "organize", "options": { - "excludeByName": { "Time": true, "__name__": true, "job": true, "instance": true, "endpoint": true, "service": true, "node": true, "metrics_path": true }, + "excludeByName": { + "Time": true, + "__name__": true, + "job": true, + "instance": true, + "endpoint": true, + "service": true, + "node": true, + "metrics_path": true + }, "renameByName": { "namespace": "Namespace", "persistentvolumeclaim": "PVC", @@ -1209,7 +3857,12 @@ "options": { "filters": [ { - "config": { "id": "greater", "options": { "value": 0 } }, + "config": { + "id": "greater", + "options": { + "value": 0 + } + }, "fieldName": "Capacity" } ], @@ -1220,26 +3873,45 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 70 }, - { "color": "red", "value": 85 } + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } ] }, "unit": "percent" }, "overrides": [] }, - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 46 }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 72 + }, "id": 52, "options": { "displayMode": "gradient", @@ -1248,7 +3920,13 @@ "minVizWidth": 8, "namePlacement": "auto", "orientation": "horizontal", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showUnfilled": true, "sizing": "auto", "valueMode": "color" @@ -1257,7 +3935,10 @@ "type": "bargauge", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes * 100", "legendFormat": "{{namespace}}/{{persistentvolumeclaim}}", "refId": "A" @@ -1265,10 +3946,15 @@ ] }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "palette-classic" }, + "color": { + "mode": "palette-classic" + }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, @@ -1280,105 +3966,232 @@ "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, - "scaleDistribution": { "type": "linear" }, + "scaleDistribution": { + "type": "linear" + }, "showPoints": "never", - "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 85 } + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 85 + } ] }, "unit": "percent", "max": 100, "min": 0 }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^PVE /" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 80 }, - "gridPos": { "h": 8, "w": 24, "x": 0, "y": 54 }, "id": 53, "options": { - "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, "title": "Node Disk Usage (24h)", "type": "timeseries", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "(1 - node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs\"}) * 100", "legendFormat": "{{instance}}", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "pve_disk_usage_bytes{id=~\"storage/.*\"} / pve_disk_size_bytes{id=~\"storage/.*\"} * 100", + "legendFormat": "PVE {{id}}", + "refId": "B" } ] }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 62 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 88 + }, "id": 60, "panels": [], "title": "Certificate Expiry", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "custom": { "align": "auto", - "cellOptions": { "type": "auto" }, + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "orange", "value": 7 }, - { "color": "green", "value": 30 } + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 7 + }, + { + "color": "green", + "value": 30 + } ] } }, "overrides": [ { - "matcher": { "id": "byName", "options": "Certificate" }, + "matcher": { + "id": "byName", + "options": "Certificate" + }, "properties": [ - { "id": "custom.width", "value": 400 } + { + "id": "custom.width", + "value": 400 + } ] }, { - "matcher": { "id": "byName", "options": "Days Remaining" }, + "matcher": { + "id": "byName", + "options": "Days Remaining" + }, "properties": [ - { "id": "decimals", "value": 0 }, - { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } } + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + } ] } ] }, - "gridPos": { "h": 8, "w": 24, "x": 0, "y": 63 }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 89 + }, "id": 61, "options": { "cellHeight": "sm", - "footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false }, + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true, - "sortBy": [{ "desc": false, "displayName": "Days Remaining" }] + "sortBy": [ + { + "desc": false, + "displayName": "Days Remaining" + } + ] }, "title": "TLS Certificate Expiry", "type": "table", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "expr": "(traefik_tls_certs_not_after - time()) / 86400", "format": "table", "instant": true, @@ -1390,7 +4203,15 @@ { "id": "organize", "options": { - "excludeByName": { "Time": true, "__name__": true, "job": true, "instance": true, "endpoint": true, "service": true, "sans": true }, + "excludeByName": { + "Time": true, + "__name__": true, + "job": true, + "instance": true, + "endpoint": true, + "service": true, + "sans": true + }, "renameByName": { "cn": "Certificate", "serial": "Serial", @@ -1402,323 +4223,23 @@ "id": "sortBy", "options": { "fields": {}, - "sort": [{ "field": "Days Remaining", "desc": false }] + "sort": [ + { + "field": "Days Remaining", + "desc": false + } + ] } } ] - }, - { - "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 71 }, - "id": 70, - "panels": [], - "title": "Infrastructure", - "type": "row" - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 70 }, - { "color": "red", "value": 85 } - ] - }, - "unit": "celsius" - }, - "overrides": [] - }, - "gridPos": { "h": 4, "w": 4, "x": 0, "y": 72 }, - "id": 71, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "textMode": "auto" - }, - "title": "GPU Temp", - "type": "stat", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_TEMP", - "legendFormat": "GPU Temp", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 60 }, - { "color": "red", "value": 85 } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { "h": 4, "w": 4, "x": 4, "y": 72 }, - "id": 72, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "title": "GPU Util %", - "type": "gauge", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "nvidia_tesla_t4_DCGM_FI_DEV_GPU_UTIL", - "legendFormat": "GPU Util", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 12000 }, - { "color": "red", "value": 14000 } - ] - }, - "unit": "decmbytes" - }, - "overrides": [] - }, - "gridPos": { "h": 4, "w": 4, "x": 8, "y": 72 }, - "id": 73, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "textMode": "auto" - }, - "title": "GPU VRAM Used", - "type": "stat", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "nvidia_tesla_t4_DCGM_FI_DEV_FB_USED", - "legendFormat": "VRAM", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null }, - { "color": "orange", "value": 400 }, - { "color": "red", "value": 600 } - ] - }, - "unit": "watt" - }, - "overrides": [] - }, - "gridPos": { "h": 4, "w": 4, "x": 12, "y": 72 }, - "id": 74, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "textMode": "auto" - }, - "title": "Server Power", - "type": "stat", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "avg(r730_idrac_idrac_power_supply_input_watts)", - "legendFormat": "Power", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "red", "value": null }, - { "color": "orange", "value": 10 }, - { "color": "green", "value": 30 } - ] - }, - "unit": "m" - }, - "overrides": [] - }, - "gridPos": { "h": 4, "w": 4, "x": 16, "y": 72 }, - "id": 75, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "textMode": "auto" - }, - "title": "UPS Battery (min)", - "type": "stat", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "ups_upsEstimatedMinutesRemaining", - "legendFormat": "Battery", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "red", "value": null }, - { "color": "orange", "value": 30 }, - { "color": "green", "value": 80 } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { "h": 4, "w": 4, "x": 20, "y": 72 }, - "id": 76, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "title": "UPS Charge %", - "type": "gauge", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "ups_upsEstimatedChargeRemaining", - "legendFormat": "Charge", - "refId": "A" - } - ] - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "palette-classic" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { "type": "linear" }, - "showPoints": "never", - "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null } - ] - }, - "unit": "watt", - "min": 0 - }, - "overrides": [] - }, - "gridPos": { "h": 8, "w": 24, "x": 0, "y": 76 }, - "id": 77, - "options": { - "legend": { "calcs": ["mean", "max", "min"], "displayMode": "table", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } - }, - "title": "Power Draw (24h)", - "type": "timeseries", - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "avg_over_time(r730_idrac_idrac_power_supply_input_watts[$__rate_interval])", - "legendFormat": "PSU {{id}}", - "refId": "A" - }, - { - "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "avg_over_time(r730_idrac_idrac_power_control_avg_consumed_watts[$__rate_interval])", - "legendFormat": "Avg Consumed", - "refId": "B" - } - ] } ], "refresh": "5m", "schemaVersion": 39, - "tags": ["cluster-health", "daily-report"], + "tags": [ + "cluster-health", + "daily-report" + ], "templating": { "list": [ {