From a2b1b0e817c31b464523412748b0ecbcaa9c101a Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 29 Mar 2026 22:17:35 +0300 Subject: [PATCH] remove caretta network mapper to free 3Gi cluster memory Caretta eBPF DaemonSet was using 600Mi x 5 nodes = 3Gi total for non-critical network topology visualization. Removing it to free memory for novelapp and aiostreams which were stuck in Pending. --- .../monitoring/modules/monitoring/caretta.tf | 62 -- .../dashboards/caretta-dashboard.json | 861 ------------------ .../monitoring/modules/monitoring/grafana.tf | 1 - .../monitoring/prometheus_chart_values.tpl | 5 - stacks/platform/modules/monitoring/caretta.tf | 62 -- .../dashboards/caretta-dashboard.json | 861 ------------------ .../monitoring/prometheus_chart_values.tpl | 5 - 7 files changed, 1857 deletions(-) delete mode 100644 stacks/monitoring/modules/monitoring/caretta.tf delete mode 100644 stacks/monitoring/modules/monitoring/dashboards/caretta-dashboard.json delete mode 100644 stacks/platform/modules/monitoring/caretta.tf delete mode 100644 stacks/platform/modules/monitoring/dashboards/caretta-dashboard.json diff --git a/stacks/monitoring/modules/monitoring/caretta.tf b/stacks/monitoring/modules/monitoring/caretta.tf deleted file mode 100644 index cf8bfcae..00000000 --- a/stacks/monitoring/modules/monitoring/caretta.tf +++ /dev/null @@ -1,62 +0,0 @@ -resource "helm_release" "caretta" { - namespace = kubernetes_namespace.monitoring.metadata[0].name - create_namespace = true - name = "caretta" - - repository = "https://helm.groundcover.com/" - chart = "caretta" - version = "0.0.16" - - values = [yamlencode({ - grafana = { - enabled = false - } - victoria-metrics-single = { - enabled = false - } - tolerations = [ - { - key = "node-role.kubernetes.io/control-plane" - operator = "Exists" - effect = "NoSchedule" - }, - { - key = "nvidia.com/gpu" - operator = "Exists" - effect = "NoSchedule" - } - ] - resources = { - requests = { - cpu = "10m" - memory = "600Mi" - } - limits = { - memory = "600Mi" - } - } - })] -} - -resource "kubernetes_service" "caretta_metrics" { - metadata { - name = "caretta-metrics" - namespace = kubernetes_namespace.monitoring.metadata[0].name - labels = { - app = "caretta" - } - } - spec { - selector = { - app = "caretta" - } - port { - name = "metrics" - port = 7117 - target_port = 7117 - protocol = "TCP" - } - } -} - -# Caretta dashboard is now loaded via the grafana_dashboards for_each in grafana.tf diff --git a/stacks/monitoring/modules/monitoring/dashboards/caretta-dashboard.json b/stacks/monitoring/modules/monitoring/dashboards/caretta-dashboard.json deleted file mode 100644 index 5e04b727..00000000 --- a/stacks/monitoring/modules/monitoring/dashboards/caretta-dashboard.json +++ /dev/null @@ -1,861 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": null, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "description": "", - "gridPos": { - "h": 28, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "interval": "15s", - "options": { - "nodes": { - "mainStatUnit": "" - }, - "edges": { - "mainStatUnit": "" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "increase((sum by (id, title, subTitle, detail__kind, color) (label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace((label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"detail__kind\", \"$1\", \"server_kind\", \"(.*)\"), \"subTitle\", \"$1\", \"server_namespace\", \"(.*)\"), \"title\", \"$1\", \"server_name\", \"(.*)\"), \"id\", \"$1\", \"server_id\", \"(.*)\") or label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"detail__kind\", \"$1\", \"client_kind\", \"(.*)\"), \"subTitle\", \"$1\", \"client_namespace\", \"(.*)\"), \"title\", \"$1\", \"client_name\", \"(.*)\"), \"id\", \"$1\", \"client_id\", \"(.*)\")), \"color\", \"#8F8F8F\", \"subTitle\", \"(.*)\"), \"color\", \"#F2495C\", \"subTitle\", \"^external$\"), \"color\", \"#8AB8FF\", \"title\", \"^10\\\\..*\"), \"color\", \"#8AB8FF\", \"title\", \"^192\\\\.168\\\\..*\"), \"color\", \"#8AB8FF\", \"title\", \"^172\\\\.(1[6-9]|2[0-9]|3[01])\\\\..*\"), \"color\", \"#8AB8FF\", \"title\", \"^(0\\\\.0\\\\.0\\\\.0|localhost)$\"), \"color\", \"#8AB8FF\", \"subTitle\", \"^node$\"), \"color\", \"#FF9830\", \"subTitle\", \"^traefik$\"), \"color\", \"#5794F2\", \"subTitle\", \"^monitoring$\"), \"color\", \"#73BF69\", \"subTitle\", \"^dbaas$\"), \"color\", \"#B877D9\", \"subTitle\", \"^authentik$\"), \"color\", \"#FF7383\", \"subTitle\", \"^crowdsec$\"), \"color\", \"#FADE2A\", \"subTitle\", \"^uptime-kuma$\"), \"color\", \"#56A64B\", \"subTitle\", \"^immich$\"), \"color\", \"#C0D8FF\", \"subTitle\", \"^technitium$\"), \"color\", \"#FF6600\", \"subTitle\", \"^kyverno$\"), \"color\", \"#76B900\", \"subTitle\", \"^nvidia$\")))[$__range:$__interval]) > 0", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "nodes" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "increase((sum by (id, source, target, mainStat) ((label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"id\", \"$1\", \"link_id\", \"(.*)\"), \"source\", \"$1\", \"client_id\", \"(.*)\"), \"target\", \"$1\", \"server_id\", \"(.*)\"), \"mainStat\", \"$1\", \"server_port\", \"(.*)\"))))[$__range:$__interval]) > 0", - "format": "table", - "hide": false, - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "edges" - } - ], - "title": "Service Map", - "type": "nodeGraph", - "fieldConfig": { - "defaults": {}, - "overrides": [] - } - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "fixed" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "links": [], - "mappings": [] - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 28 - }, - "id": 4, - "options": { - "displayLabels": [ - "name" - ], - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "pieType": "donut", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum by (server_port) (increase((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"})[$__range:$__interval])) > 0", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Active Ports", - "type": "piechart" - }, - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 21, - "y": 36 - }, - "id": 10, - "options": { - "code": { - "language": "plaintext", - "showLineNumbers": false, - "showMiniMap": false - }, - "content": "\n \n
\n\n

\n\n
\n", - "mode": "markdown" - }, - "pluginVersion": "10.1.2", - "type": "text" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "purple", - "mode": "continuous-blues" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 9, - "x": 15, - "y": 28 - }, - "id": 8, - "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "valueMode": "color" - }, - "pluginVersion": "10.1.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "topk(8, sum by (client_name) ((rate(caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"}[$__range:$__interval]))))", - "format": "time_series", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Top Throughput Workloads", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-blues" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 9, - "x": 6, - "y": 28 - }, - "id": 6, - "options": { - "colorMode": "background", - "graphMode": "area", - "justifyMode": "center", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.1.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "topk(7, sum by (client_name, server_name) ( rate( (caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\", client_kind!~\"(node|external)\",} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\", server_kind!~\"(node|external)\"})[$__range:$__interval]) ) )", - "format": "time_series", - "instant": true, - "legendFormat": "{{client_name}} \u2b82 {{server_name}}", - "range": false, - "refId": "A" - } - ], - "title": "Top Throughput Connections", - "type": "stat" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 40 - }, - "id": 11, - "title": "Network Flows (GoFlow2 / pfSense NetFlow)", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisLabel": "flows/s", - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "scheme", - "lineWidth": 2, - "pointSize": 5, - "showPoints": "never", - "spanNulls": true - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 41 - }, - "id": 12, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "rate(goflow2_flow_process_nf_flowset_records_total{type=\"DataFlowSet\"}[5m])", - "legendFormat": "Flows/s ({{router}})", - "range": true, - "refId": "A" - } - ], - "title": "NetFlow Ingestion Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisLabel": "", - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "scheme", - "lineWidth": 2, - "pointSize": 5, - "showPoints": "never", - "spanNulls": true - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 41 - }, - "id": 13, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "rate(goflow2_flow_traffic_bytes_total[5m])", - "legendFormat": "Bytes/s from {{remote_ip}}", - "range": true, - "refId": "A" - } - ], - "title": "NetFlow Traffic Volume", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 10000 - }, - { - "color": "red", - "value": 100000 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 49 - }, - "id": 14, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "goflow2_flow_process_nf_flowset_records_total{type=\"DataFlowSet\"}", - "legendFormat": "Total Flows", - "instant": true, - "refId": "A" - } - ], - "title": "Total Flows Processed", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 49 - }, - "id": 15, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "goflow2_flow_process_nf_total", - "legendFormat": "Messages", - "instant": true, - "refId": "A" - } - ], - "title": "NetFlow Messages", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 49 - }, - "id": 16, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "goflow2_flow_traffic_bytes_total", - "legendFormat": "Bytes", - "instant": true, - "refId": "A" - } - ], - "title": "Total NetFlow Bytes", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 2, - "pointSize": 5, - "showPoints": "never", - "spanNulls": true - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 12, - "y": 49 - }, - "id": 17, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "rate(goflow2_flow_process_nf_delay_seconds_sum[5m]) / rate(goflow2_flow_process_nf_delay_seconds_count[5m])", - "legendFormat": "Avg Delay", - "range": true, - "refId": "A" - } - ], - "title": "Flow Processing Delay", - "type": "timeseries" - } - ], - "refresh": "1h", - "schemaVersion": 38, - "style": "dark", - "tags": [ - "network", - "caretta", - "goflow2" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": "(.*)", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "query_result(caretta_links_observed)", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "namespace", - "options": [], - "query": { - "query": "query_result(caretta_links_observed)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "/.*_namespace=\"([^\"]*).*/", - "skipUrlSync": false, - "sort": 1, - "type": "query" - }, - { - "allValue": "(.*)", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "query_result(caretta_links_observed)", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "kind", - "options": [], - "query": { - "query": "query_result(caretta_links_observed)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "/.*_kind=\"([^\"]*).*/", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "allValue": "(.*)", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "query_result(caretta_links_observed)", - "hide": 0, - "includeAll": true, - "label": "workload", - "multi": true, - "name": "workload", - "options": [], - "query": { - "query": "query_result(caretta_links_observed)", - "refId": "StandardVariableQuery" - }, - "refresh": 2, - "regex": "/.*_name=\"([^\"]*).*/", - "skipUrlSync": false, - "sort": 1, - "type": "query" - }, - { - "allValue": "(.*)", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "label_values(server_port)", - "hide": 0, - "includeAll": true, - "label": "server port", - "multi": true, - "name": "port", - "options": [], - "query": { - "query": "label_values(server_port)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - } - ] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Network Observability", - "uid": "network-observability", - "version": 2, - "weekStart": "" -} \ No newline at end of file diff --git a/stacks/monitoring/modules/monitoring/grafana.tf b/stacks/monitoring/modules/monitoring/grafana.tf index fe3804cf..7587d9a4 100644 --- a/stacks/monitoring/modules/monitoring/grafana.tf +++ b/stacks/monitoring/modules/monitoring/grafana.tf @@ -112,7 +112,6 @@ locals { "pods.json" = "Cluster" "kube-state-metrics.json" = "Cluster" # Networking & DNS - "caretta-dashboard.json" = "Networking" "core_dns.json" = "Networking" "technitium-dns.json" = "Networking" "nginx_ingress.json" = "Networking" diff --git a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl index c0df184b..219984df 100755 --- a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl @@ -1845,11 +1845,6 @@ extraScrapeConfigs: | - source_labels: [__meta_kubernetes_pod_name] target_label: instance metrics_path: '/metrics' - - job_name: 'caretta' - static_configs: - - targets: - - "caretta-metrics.monitoring.svc.cluster.local:7117" - metrics_path: '/metrics' - job_name: 'goflow2' static_configs: - targets: diff --git a/stacks/platform/modules/monitoring/caretta.tf b/stacks/platform/modules/monitoring/caretta.tf deleted file mode 100644 index cf8bfcae..00000000 --- a/stacks/platform/modules/monitoring/caretta.tf +++ /dev/null @@ -1,62 +0,0 @@ -resource "helm_release" "caretta" { - namespace = kubernetes_namespace.monitoring.metadata[0].name - create_namespace = true - name = "caretta" - - repository = "https://helm.groundcover.com/" - chart = "caretta" - version = "0.0.16" - - values = [yamlencode({ - grafana = { - enabled = false - } - victoria-metrics-single = { - enabled = false - } - tolerations = [ - { - key = "node-role.kubernetes.io/control-plane" - operator = "Exists" - effect = "NoSchedule" - }, - { - key = "nvidia.com/gpu" - operator = "Exists" - effect = "NoSchedule" - } - ] - resources = { - requests = { - cpu = "10m" - memory = "600Mi" - } - limits = { - memory = "600Mi" - } - } - })] -} - -resource "kubernetes_service" "caretta_metrics" { - metadata { - name = "caretta-metrics" - namespace = kubernetes_namespace.monitoring.metadata[0].name - labels = { - app = "caretta" - } - } - spec { - selector = { - app = "caretta" - } - port { - name = "metrics" - port = 7117 - target_port = 7117 - protocol = "TCP" - } - } -} - -# Caretta dashboard is now loaded via the grafana_dashboards for_each in grafana.tf diff --git a/stacks/platform/modules/monitoring/dashboards/caretta-dashboard.json b/stacks/platform/modules/monitoring/dashboards/caretta-dashboard.json deleted file mode 100644 index 5e04b727..00000000 --- a/stacks/platform/modules/monitoring/dashboards/caretta-dashboard.json +++ /dev/null @@ -1,861 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": null, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "description": "", - "gridPos": { - "h": 28, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "interval": "15s", - "options": { - "nodes": { - "mainStatUnit": "" - }, - "edges": { - "mainStatUnit": "" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "increase((sum by (id, title, subTitle, detail__kind, color) (label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace(label_replace((label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"detail__kind\", \"$1\", \"server_kind\", \"(.*)\"), \"subTitle\", \"$1\", \"server_namespace\", \"(.*)\"), \"title\", \"$1\", \"server_name\", \"(.*)\"), \"id\", \"$1\", \"server_id\", \"(.*)\") or label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"detail__kind\", \"$1\", \"client_kind\", \"(.*)\"), \"subTitle\", \"$1\", \"client_namespace\", \"(.*)\"), \"title\", \"$1\", \"client_name\", \"(.*)\"), \"id\", \"$1\", \"client_id\", \"(.*)\")), \"color\", \"#8F8F8F\", \"subTitle\", \"(.*)\"), \"color\", \"#F2495C\", \"subTitle\", \"^external$\"), \"color\", \"#8AB8FF\", \"title\", \"^10\\\\..*\"), \"color\", \"#8AB8FF\", \"title\", \"^192\\\\.168\\\\..*\"), \"color\", \"#8AB8FF\", \"title\", \"^172\\\\.(1[6-9]|2[0-9]|3[01])\\\\..*\"), \"color\", \"#8AB8FF\", \"title\", \"^(0\\\\.0\\\\.0\\\\.0|localhost)$\"), \"color\", \"#8AB8FF\", \"subTitle\", \"^node$\"), \"color\", \"#FF9830\", \"subTitle\", \"^traefik$\"), \"color\", \"#5794F2\", \"subTitle\", \"^monitoring$\"), \"color\", \"#73BF69\", \"subTitle\", \"^dbaas$\"), \"color\", \"#B877D9\", \"subTitle\", \"^authentik$\"), \"color\", \"#FF7383\", \"subTitle\", \"^crowdsec$\"), \"color\", \"#FADE2A\", \"subTitle\", \"^uptime-kuma$\"), \"color\", \"#56A64B\", \"subTitle\", \"^immich$\"), \"color\", \"#C0D8FF\", \"subTitle\", \"^technitium$\"), \"color\", \"#FF6600\", \"subTitle\", \"^kyverno$\"), \"color\", \"#76B900\", \"subTitle\", \"^nvidia$\")))[$__range:$__interval]) > 0", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "nodes" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "increase((sum by (id, source, target, mainStat) ((label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"id\", \"$1\", \"link_id\", \"(.*)\"), \"source\", \"$1\", \"client_id\", \"(.*)\"), \"target\", \"$1\", \"server_id\", \"(.*)\"), \"mainStat\", \"$1\", \"server_port\", \"(.*)\"))))[$__range:$__interval]) > 0", - "format": "table", - "hide": false, - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "edges" - } - ], - "title": "Service Map", - "type": "nodeGraph", - "fieldConfig": { - "defaults": {}, - "overrides": [] - } - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "fixed" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "links": [], - "mappings": [] - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 28 - }, - "id": 4, - "options": { - "displayLabels": [ - "name" - ], - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "pieType": "donut", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum by (server_port) (increase((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"})[$__range:$__interval])) > 0", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Active Ports", - "type": "piechart" - }, - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 21, - "y": 36 - }, - "id": 10, - "options": { - "code": { - "language": "plaintext", - "showLineNumbers": false, - "showMiniMap": false - }, - "content": "\n \n
\n\n

\n\n
\n", - "mode": "markdown" - }, - "pluginVersion": "10.1.2", - "type": "text" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "purple", - "mode": "continuous-blues" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 9, - "x": 15, - "y": 28 - }, - "id": 8, - "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "valueMode": "color" - }, - "pluginVersion": "10.1.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "topk(8, sum by (client_name) ((rate(caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"}[$__range:$__interval]))))", - "format": "time_series", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Top Throughput Workloads", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-blues" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 9, - "x": 6, - "y": 28 - }, - "id": 6, - "options": { - "colorMode": "background", - "graphMode": "area", - "justifyMode": "center", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.1.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "topk(7, sum by (client_name, server_name) ( rate( (caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\", client_kind!~\"(node|external)\",} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\", server_kind!~\"(node|external)\"})[$__range:$__interval]) ) )", - "format": "time_series", - "instant": true, - "legendFormat": "{{client_name}} \u2b82 {{server_name}}", - "range": false, - "refId": "A" - } - ], - "title": "Top Throughput Connections", - "type": "stat" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 40 - }, - "id": 11, - "title": "Network Flows (GoFlow2 / pfSense NetFlow)", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisLabel": "flows/s", - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "scheme", - "lineWidth": 2, - "pointSize": 5, - "showPoints": "never", - "spanNulls": true - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 41 - }, - "id": 12, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "rate(goflow2_flow_process_nf_flowset_records_total{type=\"DataFlowSet\"}[5m])", - "legendFormat": "Flows/s ({{router}})", - "range": true, - "refId": "A" - } - ], - "title": "NetFlow Ingestion Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisLabel": "", - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "scheme", - "lineWidth": 2, - "pointSize": 5, - "showPoints": "never", - "spanNulls": true - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 41 - }, - "id": 13, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "rate(goflow2_flow_traffic_bytes_total[5m])", - "legendFormat": "Bytes/s from {{remote_ip}}", - "range": true, - "refId": "A" - } - ], - "title": "NetFlow Traffic Volume", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 10000 - }, - { - "color": "red", - "value": 100000 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 49 - }, - "id": 14, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "goflow2_flow_process_nf_flowset_records_total{type=\"DataFlowSet\"}", - "legendFormat": "Total Flows", - "instant": true, - "refId": "A" - } - ], - "title": "Total Flows Processed", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 49 - }, - "id": 15, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "goflow2_flow_process_nf_total", - "legendFormat": "Messages", - "instant": true, - "refId": "A" - } - ], - "title": "NetFlow Messages", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 49 - }, - "id": 16, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "goflow2_flow_traffic_bytes_total", - "legendFormat": "Bytes", - "instant": true, - "refId": "A" - } - ], - "title": "Total NetFlow Bytes", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 2, - "pointSize": 5, - "showPoints": "never", - "spanNulls": true - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 12, - "y": 49 - }, - "id": 17, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "rate(goflow2_flow_process_nf_delay_seconds_sum[5m]) / rate(goflow2_flow_process_nf_delay_seconds_count[5m])", - "legendFormat": "Avg Delay", - "range": true, - "refId": "A" - } - ], - "title": "Flow Processing Delay", - "type": "timeseries" - } - ], - "refresh": "1h", - "schemaVersion": 38, - "style": "dark", - "tags": [ - "network", - "caretta", - "goflow2" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": "(.*)", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "query_result(caretta_links_observed)", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "namespace", - "options": [], - "query": { - "query": "query_result(caretta_links_observed)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "/.*_namespace=\"([^\"]*).*/", - "skipUrlSync": false, - "sort": 1, - "type": "query" - }, - { - "allValue": "(.*)", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "query_result(caretta_links_observed)", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "kind", - "options": [], - "query": { - "query": "query_result(caretta_links_observed)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "/.*_kind=\"([^\"]*).*/", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "allValue": "(.*)", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "query_result(caretta_links_observed)", - "hide": 0, - "includeAll": true, - "label": "workload", - "multi": true, - "name": "workload", - "options": [], - "query": { - "query": "query_result(caretta_links_observed)", - "refId": "StandardVariableQuery" - }, - "refresh": 2, - "regex": "/.*_name=\"([^\"]*).*/", - "skipUrlSync": false, - "sort": 1, - "type": "query" - }, - { - "allValue": "(.*)", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "label_values(server_port)", - "hide": 0, - "includeAll": true, - "label": "server port", - "multi": true, - "name": "port", - "options": [], - "query": { - "query": "label_values(server_port)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - } - ] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Network Observability", - "uid": "network-observability", - "version": 2, - "weekStart": "" -} \ No newline at end of file diff --git a/stacks/platform/modules/monitoring/prometheus_chart_values.tpl b/stacks/platform/modules/monitoring/prometheus_chart_values.tpl index 970d943e..8d5a63a3 100755 --- a/stacks/platform/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/platform/modules/monitoring/prometheus_chart_values.tpl @@ -1232,11 +1232,6 @@ extraScrapeConfigs: | - source_labels: [__meta_kubernetes_pod_name] target_label: instance metrics_path: '/metrics' - - job_name: 'caretta' - static_configs: - - targets: - - "caretta-metrics.monitoring.svc.cluster.local:7117" - metrics_path: '/metrics' - job_name: 'goflow2' static_configs: - targets: