From c49e4d0a86bc5498ce4af4bcdb7121147ebbf8c2 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 4 May 2025 11:22:12 +0000 Subject: [PATCH] add loki + alloy deployments for logs collection [ci skip] --- modules/kubernetes/monitoring/alloy.yaml | 100 + .../monitoring/dashboards/loki.json | 288 +++ .../monitoring/dashboards/registry.json | 1875 +++++++++++++++++ .../monitoring/k8s-monitoring-values.yaml | 78 + modules/kubernetes/monitoring/loki.yaml | 64 + modules/kubernetes/monitoring/main.tf | 84 + 6 files changed, 2489 insertions(+) create mode 100644 modules/kubernetes/monitoring/alloy.yaml create mode 100644 modules/kubernetes/monitoring/dashboards/loki.json create mode 100644 modules/kubernetes/monitoring/dashboards/registry.json create mode 100644 modules/kubernetes/monitoring/k8s-monitoring-values.yaml create mode 100644 modules/kubernetes/monitoring/loki.yaml diff --git a/modules/kubernetes/monitoring/alloy.yaml b/modules/kubernetes/monitoring/alloy.yaml new file mode 100644 index 00000000..c7b0caaa --- /dev/null +++ b/modules/kubernetes/monitoring/alloy.yaml @@ -0,0 +1,100 @@ +alloy: + configMap: + content: |- + // Write your Alloy config here: + logging { + level = "info" + format = "logfmt" + } + loki.write "default" { + endpoint { + url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push" + } + } + + // discovery.kubernetes allows you to find scrape targets from Kubernetes resources. + // It watches cluster state and ensures targets are continually synced with what is currently running in your cluster. + discovery.kubernetes "pod" { + role = "pod" + } + + // discovery.relabel rewrites the label set of the input targets by applying one or more relabeling rules. + // If no rules are defined, then the input targets are exported as-is. + discovery.relabel "pod_logs" { + targets = discovery.kubernetes.pod.targets + + // Label creation - "namespace" field from "__meta_kubernetes_namespace" + rule { + source_labels = ["__meta_kubernetes_namespace"] + action = "replace" + target_label = "namespace" + } + + // Label creation - "pod" field from "__meta_kubernetes_pod_name" + rule { + source_labels = ["__meta_kubernetes_pod_name"] + action = "replace" + target_label = "pod" + } + + // Label creation - "container" field from "__meta_kubernetes_pod_container_name" + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + action = "replace" + target_label = "container" + } + + // Label creation - "app" field from "__meta_kubernetes_pod_label_app_kubernetes_io_name" + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + action = "replace" + target_label = "app" + } + + // Label creation - "job" field from "__meta_kubernetes_namespace" and "__meta_kubernetes_pod_container_name" + // Concatenate values __meta_kubernetes_namespace/__meta_kubernetes_pod_container_name + rule { + source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"] + action = "replace" + target_label = "job" + separator = "/" + replacement = "$1" + } + + // Label creation - "container" field from "__meta_kubernetes_pod_uid" and "__meta_kubernetes_pod_container_name" + // Concatenate values __meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name.log + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + action = "replace" + target_label = "__path__" + separator = "/" + replacement = "/var/log/pods/*$1/*.log" + } + + // Label creation - "container_runtime" field from "__meta_kubernetes_pod_container_id" + rule { + source_labels = ["__meta_kubernetes_pod_container_id"] + action = "replace" + target_label = "container_runtime" + regex = "^(\\S+):\\/\\/.+$" + replacement = "$1" + } + } + + // loki.source.kubernetes tails logs from Kubernetes containers using the Kubernetes API. + loki.source.kubernetes "pod_logs" { + targets = discovery.relabel.pod_logs.output + forward_to = [loki.process.pod_logs.receiver] + } + + // loki.process receives log entries from other Loki components, applies one or more processing stages, + // and forwards the results to the list of receivers in the component's arguments. + loki.process "pod_logs" { + stage.static_labels { + values = { + cluster = "default", + } + } + + forward_to = [loki.write.default.receiver] + } diff --git a/modules/kubernetes/monitoring/dashboards/loki.json b/modules/kubernetes/monitoring/dashboards/loki.json new file mode 100644 index 00000000..1a5027ef --- /dev/null +++ b/modules/kubernetes/monitoring/dashboards/loki.json @@ -0,0 +1,288 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Logs collected from Kubernetes, stored in Loki", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 15141, + "graphTooltip": 0, + "id": 25, + "links": [], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "fejvsai4fvvggf" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "fejvsai4fvvggf" + }, + "editorMode": "code", + "expr": "sum(count_over_time({namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\" [$__interval]))", + "instant": false, + "legendFormat": "Log count", + "queryType": "range", + "range": true, + "refId": "A" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "fejvsai4fvvggf" + }, + "description": "Logs from services running in Kubernetes", + "gridPos": { + "h": 25, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 2, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "fejvsai4fvvggf" + }, + "editorMode": "code", + "expr": "{namespace=~\"$namespace\", container =~\"$container\"} |= \"$query\"", + "queryType": "range", + "refId": "A" + } + ], + "type": "logs" + } + ], + "refresh": "5s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "description": "String to search for", + "hide": 0, + "label": "Search Query", + "name": "query", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "dbaas" + ], + "value": [ + "dbaas" + ] + }, + "datasource": { + "type": "loki", + "uid": "fejvsai4fvvggf" + }, + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "loki", + "uid": "fejvsai4fvvggf" + }, + "definition": "label_values(stream)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "stream", + "options": [], + "query": "label_values(stream)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "loki", + "uid": "fejvsai4fvvggf" + }, + "definition": "label_values(container)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "container", + "options": [], + "query": "label_values(container)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Loki Kubernetes Logs", + "uid": "o6-BGgnnk", + "version": 2, + "weekStart": "" + } \ No newline at end of file diff --git a/modules/kubernetes/monitoring/dashboards/registry.json b/modules/kubernetes/monitoring/dashboards/registry.json new file mode 100644 index 00000000..dde31a0b --- /dev/null +++ b/modules/kubernetes/monitoring/dashboards/registry.json @@ -0,0 +1,1875 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 9621, + "graphTooltip": 0, + "id": 24, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 22, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "General Counters, CPU, Memory and File Descriptor Stats", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "start time of the process", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "dateTimeFromNow" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 4, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "registry_process_start_time_seconds{app=\"$app\", instance=\"$instance\"} * 1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Start Time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The number of cache request received", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 4, + "y": 1 + }, + "id": 16, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "registry_registry_storage_cache_total{instance=\"$instance\",type=\"Request\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ type }}", + "refId": "A" + } + ], + "title": "Total Cache Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The number of cache request received (hits)", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 9, + "y": 1 + }, + "id": 12, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "registry_registry_storage_cache_total{instance=\"$instance\",type=\"Hit\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ type }}", + "refId": "A" + } + ], + "title": "Total Hit Cache Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The number of cache request received (miss)", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 14, + "y": 1 + }, + "id": 14, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "registry_registry_storage_cache_total{instance=\"$instance\",type=\"Miss\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ type }}", + "refId": "A" + } + ], + "title": "Total Missed Cache Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Process Resident Memory Usage", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 19, + "y": 1 + }, + "id": 24, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "avg(registry_process_resident_memory_bytes{instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Resident Memory Usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Average user and system CPU time spent in seconds.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 4 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "avg(rate(registry_process_cpu_seconds_total{instance=\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "CPU Time", + "range": true, + "refId": "A" + } + ], + "title": "Average CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Virtual and Resident memory size in bytes, averages over 5 min interval", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 4 + }, + "id": 32, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "avg(rate(registry_process_resident_memory_bytes{instance=\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Resident Mem", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "avg(rate(registry_process_virtual_memory_bytes{instance=\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Virtual Mem", + "range": true, + "refId": "B" + } + ], + "title": "Average Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Number of open file descriptors", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 34, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "registry_process_open_fds{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Open FD", + "range": true, + "refId": "A" + } + ], + "title": "Open File Descriptors", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 20, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Registry Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The HTTP requests", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "ceil(rate(registry_registry_http_requests_total{instance=\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ handler }}", + "refId": "A" + } + ], + "title": "HTTP Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The in-flight HTTP requests", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "registry_registry_http_in_flight_requests{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ handler }}", + "refId": "A" + } + ], + "title": "In-Flight HTTP Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The rate of registry cache requests", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "ceil(rate(registry_registry_storage_cache_total{instance=\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ type }}", + "refId": "A" + } + ], + "title": "Registry Cache Requests Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "increase(registry_registry_storage_action_seconds_sum{instance=\"$instance\"}[2m]) * 1000", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "{{ action }}", + "refId": "A", + "step": 10, + "target": "" + } + ], + "title": "Registry Action Latency", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 20 + }, + "heatmap": {}, + "highlightCards": true, + "id": 36, + "legend": { + "show": false + }, + "options": { + "calculate": true, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "min": "0", + "reverse": false, + "unit": "short" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(registry_registry_http_request_duration_seconds_bucket{handler=\"blob\"}[10m])", + "format": "heatmap", + "intervalFactor": 1, + "range": true, + "refId": "A" + } + ], + "title": "HTTP Request Latencies in seconds (blob)", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 20 + }, + "heatmap": {}, + "highlightCards": true, + "id": 38, + "legend": { + "show": false + }, + "options": { + "calculate": true, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "min": "0", + "reverse": false, + "unit": "short" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(http_request_duration_seconds_bucket{handler=\"base\"}[10m])", + "format": "heatmap", + "intervalFactor": 1, + "range": true, + "refId": "A" + } + ], + "title": "Upload HTTP Request Latencies in seconds (base)", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 27 + }, + "heatmap": {}, + "highlightCards": true, + "id": 2, + "legend": { + "show": false + }, + "options": { + "calculate": true, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "min": "0", + "reverse": false, + "unit": "short" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(registry_http_request_duration_seconds_bucket{handler=\"blob_upload\"}[10m])", + "format": "heatmap", + "intervalFactor": 1, + "range": true, + "refId": "A" + } + ], + "title": "Upload HTTP Request Latencies in seconds (blob_upload)", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 27 + }, + "heatmap": {}, + "highlightCards": true, + "id": 42, + "legend": { + "show": false + }, + "options": { + "calculate": true, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "min": "0", + "reverse": false, + "unit": "short" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "rate(registry_registry_http_request_duration_seconds_bucket{handler=\"manifest\"}[10m])", + "format": "heatmap", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Manifest HTTP Request Latencies in seconds (manifest)", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 27 + }, + "heatmap": {}, + "highlightCards": true, + "id": 40, + "legend": { + "show": false + }, + "options": { + "calculate": true, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "min": "0", + "reverse": false, + "unit": "short" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(registry_http_request_duration_seconds_bucket{handler=\"catalog\"}[10m])", + "format": "heatmap", + "intervalFactor": 1, + "range": true, + "refId": "A" + } + ], + "title": "Catalog HTTP Request Latencies in seconds (catalog)", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + "yBucketBound": "auto" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "docker", + "registry", + "internals" + ], + "templating": { + "list": [ + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": "query_result(registry_registry_http_in_flight_requests)", + "refresh": 2, + "regex": "/.*kubernetes_namespace=\"([^\"]+).*/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "app", + "options": [], + "query": "query_result(registry_registry_http_in_flight_requests{kubernetes_namespace=\"$namespace\"})", + "refresh": 2, + "regex": "/.*app=\"([^\"]+)/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "192.168.1.10:5001", + "value": "192.168.1.10:5001" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "instance", + "options": [], + "query": "query_result(up{app=\"$app\"})", + "refresh": 1, + "regex": "/.*instance=\"([^\"]+).*/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Docker Registry", + "uid": "CoBSgj8iz", + "version": 5, + "weekStart": "" + } \ No newline at end of file diff --git a/modules/kubernetes/monitoring/k8s-monitoring-values.yaml b/modules/kubernetes/monitoring/k8s-monitoring-values.yaml new file mode 100644 index 00000000..6fda2edc --- /dev/null +++ b/modules/kubernetes/monitoring/k8s-monitoring-values.yaml @@ -0,0 +1,78 @@ +--- +cluster: + name: default + +destinations: + - name: loki + type: loki + url: http://loki-gateway.monitoring.svc.cluster.local/loki/api/v1/push + +clusterEvents: + enabled: false + collector: alloy-logs + namespaces: + - dbaas + - immich + - authentik + - mailserver + - crowdsec + - descheduler + - calibre + - monitoring + - ingress-nginx + - vaultwarden + +nodeLogs: + enabled: false + +podLogs: + enabled: true + gatherMethod: kubernetesApi + collector: alloy-logs + labelsToKeep: + [ + "app_kubernetes_io_name", + "container", + "instance", + "job", + "level", + "namespace", + "service_name", + "service_namespace", + "deployment_environment", + "deployment_environment_name", + ] + structuredMetadata: + pod: pod # Set structured metadata "pod" from label "pod" + namespaces: + - dbaas + - immich + - authentik + - mailserver + - crowdsec + - descheduler + - calibre + - monitoring + - ingress-nginx + - vaultwarden +# Collectors +alloy-singleton: + enabled: false + +alloy-metrics: + enabled: false + +alloy-logs: + enabled: true + # Required when using the Kubernetes API to pod logs + alloy: + mounts: + varlog: false + clustering: + enabled: true + +alloy-profiles: + enabled: false + +alloy-receiver: + enabled: false diff --git a/modules/kubernetes/monitoring/loki.yaml b/modules/kubernetes/monitoring/loki.yaml new file mode 100644 index 00000000..3684e34e --- /dev/null +++ b/modules/kubernetes/monitoring/loki.yaml @@ -0,0 +1,64 @@ +loki: + commonConfig: + replication_factor: 1 + schemaConfig: + configs: + - from: "2025-04-01" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: loki_index_ + period: 24h + pattern_ingester: + enabled: true + limits_config: + allow_structured_metadata: true + volume_enabled: true + ruler: + enable_api: true + storage: + type: "filesystem" + persistence: + enabled: true + size: 15Gi + accessModes: + - ReadWriteOnce + # Auth requires a revers proxy providing basic auth + # https://grafana.com/docs/loki/latest/operations/authentication/ + auth_enabled: false + +minio: + enabled: false + +deploymentMode: SingleBinary + +singleBinary: + replicas: 1 + +# Zero out replica counts of other deployment modes +backend: + replicas: 0 +read: + replicas: 0 +write: + replicas: 0 + +ingester: + replicas: 0 +querier: + replicas: 0 +queryFrontend: + replicas: 0 +queryScheduler: + replicas: 0 +distributor: + replicas: 0 +compactor: + replicas: 0 +indexGateway: + replicas: 0 +bloomCompactor: + replicas: 0 +bloomGateway: + replicas: 0 diff --git a/modules/kubernetes/monitoring/main.tf b/modules/kubernetes/monitoring/main.tf index 0bc55f0e..3184abaa 100644 --- a/modules/kubernetes/monitoring/main.tf +++ b/modules/kubernetes/monitoring/main.tf @@ -526,3 +526,87 @@ resource "kubernetes_service" "snmp-exporter" { } } } + +resource "helm_release" "loki" { + namespace = "monitoring" + create_namespace = true + name = "loki" + + repository = "https://grafana.github.io/helm-charts" + chart = "loki" + + values = [templatefile("${path.module}/loki.yaml", {})] + atomic = true + timeout = 120 +} + +resource "kubernetes_persistent_volume" "loki" { + metadata { + name = "loki" + } + spec { + capacity = { + storage = "15Gi" + } + access_modes = ["ReadWriteOnce"] + persistent_volume_source { + nfs { + path = "/mnt/main/loki/loki" + server = "10.0.10.15" + } + } + persistent_volume_reclaim_policy = "Retain" + volume_mode = "Filesystem" + } +} + +resource "kubernetes_persistent_volume" "loki-minio" { + metadata { + name = "loki-minio" + } + spec { + capacity = { + storage = "15Gi" + } + access_modes = ["ReadWriteMany"] + persistent_volume_source { + nfs { + path = "/mnt/main/loki/minio" + server = "10.0.10.15" + } + } + persistent_volume_reclaim_policy = "Retain" + volume_mode = "Filesystem" + } +} + + +# https://grafana.com/docs/alloy/latest/configure/kubernetes/ +resource "helm_release" "alloy" { + namespace = "monitoring" + create_namespace = true + name = "alloy" + + repository = "https://grafana.github.io/helm-charts" + chart = "alloy" + + atomic = true +} + +# Increase open file limits as alloy is reading files: +# https://serverfault.com/questions/1137211/failed-to-create-fsnotify-watcher-too-many-open-files + +# run for all nodes using : +# for n in $(kbn | awk '{print $1}'); do echo $n; s wizard@$n 'sudo sysctl -w fs.inotify.max_user_watches=2099999999; sudo sysctl -w fs.inotify.max_user_instances=2099999999;sudo sysctl -w fs.inotify.max_queued_events=2099999999'; done + +resource "helm_release" "k8s-monitoring" { + namespace = "monitoring" + create_namespace = true + name = "k8s-monitoring" + + repository = "https://grafana.github.io/helm-charts" + chart = "k8s-monitoring" + + values = [templatefile("${path.module}/k8s-monitoring-values.yaml", {})] + atomic = true +}