diff --git a/stacks/platform/modules/monitoring/caretta.tf b/stacks/platform/modules/monitoring/caretta.tf index 11028a9b..4479baa2 100644 --- a/stacks/platform/modules/monitoring/caretta.tf +++ b/stacks/platform/modules/monitoring/caretta.tf @@ -18,6 +18,27 @@ resource "helm_release" "caretta" { } } +resource "kubernetes_service" "caretta_metrics" { + metadata { + name = "caretta-metrics" + namespace = kubernetes_namespace.monitoring.metadata[0].name + labels = { + app = "caretta" + } + } + spec { + selector = { + app = "caretta" + } + port { + name = "metrics" + port = 7117 + target_port = 7117 + protocol = "TCP" + } + } +} + resource "kubernetes_config_map" "caretta_grafana_dashboard" { metadata { name = "caretta-grafana-dashboard" diff --git a/stacks/platform/modules/monitoring/dashboards/caretta-dashboard.json b/stacks/platform/modules/monitoring/dashboards/caretta-dashboard.json new file mode 100644 index 00000000..25d6bb90 --- /dev/null +++ b/stacks/platform/modules/monitoring/dashboards/caretta-dashboard.json @@ -0,0 +1,487 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 15, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "gridPos": { + "h": 24, + "w": 17, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": "15s", + "options": { + "nodes": { + "arcs": [ + { + "color": "#5794F2", + "field": "arc__color" + } + ] + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "increase((sum by (id, title, subTitle, detail__kind, arc__color) (label_replace((label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"detail__kind\", \"$1\", \"server_kind\", \"(.*)\"), \"subTitle\", \"$1\", \"server_namespace\", \"(.*)\"), \"title\", \"$1\", \"server_name\", \"(.*)\"), \"id\", \"$1\", \"server_id\", \"(.*)\") or label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"detail__kind\", \"$1\", \"client_kind\", \"(.*)\"), \"subTitle\", \"$1\", \"client_namespace\", \"(.*)\"), \"title\", \"$1\", \"client_name\", \"(.*)\"), \"id\", \"$1\", \"client_id\", \"(.*)\") ), \"arc__color\", \"1\", \"link_id\", \"(.*)\")) )[$__range:$__interval]) > 0", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "nodes" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "increase((sum by (id, source, target, mainStat) ((label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"id\", \"$1\", \"link_id\", \"(.*)\"), \"source\", \"$1\", \"client_id\", \"(.*)\"), \"target\", \"$1\", \"server_id\", \"(.*)\"), \"mainStat\", \"$1\", \"server_port\", \"(.*)\"))) )[$__range:$__interval]) > 0", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "edges" + } + ], + "title": "Service Map ☸️", + "type": "nodeGraph" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "links": [], + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 17, + "y": 0 + }, + "id": 4, + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "right", + "showLegend": false + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (server_port) (increase((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"})[$__range:$__interval])) > 0", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active Ports", + "type": "piechart" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 10, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n
\n

\n \"caretta\"\n

by groundcover

\n\n \n [![slack](https://img.shields.io/badge/slack-groundcover-yellowgreen.svg?logo=slack)](http://www.groundcover.com/join-slack)\n \n
\n

\n\n
\n", + "mode": "markdown" + }, + "pluginVersion": "10.1.2", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "purple", + "mode": "continuous-blues" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 17, + "y": 7 + }, + "id": 8, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "valueMode": "color" + }, + "pluginVersion": "10.1.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(8, sum by (client_name) ((rate(caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"}[$__range:$__interval]))))", + "format": "time_series", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Top Throughput Workloads", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-blues" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 7, + "x": 17, + "y": 15 + }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.1.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(7, sum by (client_name, server_name) ( rate( (caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\", client_kind!~\"(node|external)\",} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\", server_kind!~\"(node|external)\"})[$__range:$__interval]) ) )", + "format": "time_series", + "instant": true, + "legendFormat": "{{client_name}} ⮂ {{server_name}}", + "range": false, + "refId": "A" + } + ], + "title": "Top Throughput Connections", + "type": "stat" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "label": "datasource", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": "(.*)", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "query_result(caretta_links_observed)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "namespace", + "options": [], + "query": { + "query": "query_result(caretta_links_observed)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.*_namespace=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "allValue": "(.*)", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "query_result(caretta_links_observed)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "kind", + "options": [], + "query": { + "query": "query_result(caretta_links_observed)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.*_kind=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": "(.*)", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "query_result(caretta_links_observed)", + "hide": 0, + "includeAll": true, + "label": "workload", + "multi": true, + "name": "workload", + "options": [], + "query": { + "query": "query_result(caretta_links_observed)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "/.*_name=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "allValue": "(.*)", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(server_port)", + "hide": 0, + "includeAll": true, + "label": "server port", + "multi": true, + "name": "port", + "options": [], + "query": { + "query": "label_values(server_port)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Caretta Dashboard", + "uid": "k0Om62pVf", + "version": 2, + "weekStart": "" +} \ No newline at end of file diff --git a/stacks/platform/modules/monitoring/prometheus_chart_values.tpl b/stacks/platform/modules/monitoring/prometheus_chart_values.tpl index f6deaad1..ace0dcff 100755 --- a/stacks/platform/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/platform/modules/monitoring/prometheus_chart_values.tpl @@ -879,4 +879,14 @@ extraScrapeConfigs: | - source_labels: [__meta_kubernetes_pod_name] target_label: instance metrics_path: '/metrics' + - job_name: 'caretta' + static_configs: + - targets: + - "caretta-metrics.monitoring.svc.cluster.local:7117" + metrics_path: '/metrics' + - job_name: 'goflow2' + static_configs: + - targets: + - "goflow2.monitoring.svc.cluster.local:8080" + metrics_path: '/metrics'