add backup_output_bytes metric and cloudsync_transferred_bytes to backup dashboard
- All 7 backup CronJobs now push backup_output_bytes (file size after backup) - Cloud Sync monitor parses rclone transfer stats into cloudsync_transferred_bytes - Grafana dashboard: new Output (MiB) table column, Output Size Trend panel, Write Throughput panel, Cloud Sync Transfer Volume bargauge - All timeseries panels use points-only draw style (discrete backup snapshots) - etcd backup restructured: init_container for etcdctl (distroless image), busybox sidecar for metrics push + purge, ClusterFirstWithHostNet DNS - Fixed pre-existing curl missing in postgres:16.4-bullseye (immich, dbaas PG) - Fixed grep -oP not available in alpine/busybox (cloud sync monitor)
This commit is contained in:
parent
f289f76882
commit
d20c5e5535
8 changed files with 186 additions and 46 deletions
|
|
@ -84,6 +84,10 @@
|
|||
"matcher": { "id": "byName", "options": "Written (MiB)" },
|
||||
"properties": [{ "id": "unit", "value": "decmbytes" }]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "Output (MiB)" },
|
||||
"properties": [{ "id": "unit", "value": "decmbytes" }]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "Last Success" },
|
||||
"properties": [{ "id": "unit", "value": "dateTimeFromNow" }]
|
||||
|
|
@ -106,6 +110,7 @@
|
|||
"Value #Duration": "Duration (s)",
|
||||
"Value #Read": "Read (MiB)",
|
||||
"Value #Written": "Written (MiB)",
|
||||
"Value #Output": "Output (MiB)",
|
||||
"Value #LastSuccess": "Last Success",
|
||||
"job": "Backup"
|
||||
},
|
||||
|
|
@ -139,6 +144,13 @@
|
|||
"instant": true,
|
||||
"format": "table"
|
||||
},
|
||||
{
|
||||
"expr": "backup_output_bytes / 1048576",
|
||||
"legendFormat": "{{ job }}",
|
||||
"refId": "Output",
|
||||
"instant": true,
|
||||
"format": "table"
|
||||
},
|
||||
{
|
||||
"expr": "backup_last_success_timestamp * 1000",
|
||||
"legendFormat": "{{ job }}",
|
||||
|
|
@ -157,11 +169,9 @@
|
|||
"defaults": {
|
||||
"unit": "s",
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineWidth": 2,
|
||||
"fillOpacity": 10,
|
||||
"pointSize": 5,
|
||||
"showPoints": "auto"
|
||||
"drawStyle": "points",
|
||||
"pointSize": 8,
|
||||
"showPoints": "always"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
|
|
@ -187,11 +197,9 @@
|
|||
"defaults": {
|
||||
"unit": "bytes",
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineWidth": 2,
|
||||
"fillOpacity": 10,
|
||||
"pointSize": 5,
|
||||
"showPoints": "auto"
|
||||
"drawStyle": "points",
|
||||
"pointSize": 8,
|
||||
"showPoints": "always"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
|
|
@ -213,10 +221,68 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Backup Output Size Trend",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 22 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes",
|
||||
"custom": {
|
||||
"drawStyle": "points",
|
||||
"pointSize": 8,
|
||||
"showPoints": "always"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi", "sort": "desc" },
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "backup_output_bytes",
|
||||
"legendFormat": "{{ job }}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Write Throughput",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 22 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps",
|
||||
"custom": {
|
||||
"drawStyle": "bars",
|
||||
"lineWidth": 1,
|
||||
"fillOpacity": 50,
|
||||
"pointSize": 5,
|
||||
"showPoints": "never"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi", "sort": "desc" },
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "calcs": ["lastNotNull", "max"] }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "backup_written_bytes / backup_duration_seconds",
|
||||
"legendFormat": "{{ job }}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Cloud Sync Status",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 22 },
|
||||
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 30 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
|
|
@ -250,7 +316,7 @@
|
|||
{
|
||||
"title": "Cloud Sync Duration",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 22 },
|
||||
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 30 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
|
|
@ -280,10 +346,43 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Cloud Sync Transfer Volume",
|
||||
"type": "bargauge",
|
||||
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 36 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1073741824 },
|
||||
{ "color": "red", "value": 10737418240 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"showUnfilled": true
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "cloudsync_transferred_bytes",
|
||||
"legendFormat": "Task {{ task_id }}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Active Backup Alerts",
|
||||
"type": "alertlist",
|
||||
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 28 },
|
||||
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 42 },
|
||||
"datasource": { "type": "datasource", "uid": "grafana" },
|
||||
"options": {
|
||||
"showOptions": "current",
|
||||
|
|
@ -305,7 +404,7 @@
|
|||
{
|
||||
"title": "CronJob Last Schedule",
|
||||
"type": "table",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 34 },
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 48 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
|
|
|
|||
|
|
@ -146,7 +146,15 @@ resource "kubernetes_cron_job_v1" "cloudsync_monitor" {
|
|||
|
||||
# Extract transfer stats from job progress description (rclone output)
|
||||
JOB_PROGRESS=$(echo "$task" | jq -r '.job.progress.description // ""')
|
||||
BYTES_TX=$(echo "$JOB_PROGRESS" | grep -oP 'Transferred:\s+[\d.]+ \w+' | head -1 | awk '{print $2}' || echo 0)
|
||||
TX_NUM=$(echo "$JOB_PROGRESS" | sed -n 's/.*Transferred:[[:space:]]*\([0-9.]*\).*/\1/p' | head -1)
|
||||
TX_NUM=$${TX_NUM:-0}
|
||||
TX_UNIT=$(echo "$JOB_PROGRESS" | sed -n 's/.*Transferred:[[:space:]]*[0-9.]*[[:space:]]*\([A-Za-z]*\).*/\1/p' | head -1)
|
||||
TX_UNIT=$${TX_UNIT:-Bytes}
|
||||
case "$TX_UNIT" in
|
||||
Bytes|B) TX_MULT=1 ;; KiB|kB) TX_MULT=1024 ;; MiB|MB) TX_MULT=1048576 ;;
|
||||
GiB|GB) TX_MULT=1073741824 ;; *) TX_MULT=1 ;;
|
||||
esac
|
||||
TRANSFERRED_BYTES=$(echo "$TX_NUM $TX_MULT" | awk '{printf "%.0f", $1 * $2}')
|
||||
JOB_STARTED=$(echo "$task" | jq -r '.job.time_started."$date" // 0')
|
||||
JOB_FINISHED=$(echo "$task" | jq -r '.job.time_finished."$date" // 0')
|
||||
if [ "$JOB_STARTED" != "0" ] && [ "$JOB_STARTED" != "null" ] && [ "$JOB_FINISHED" != "0" ] && [ "$JOB_FINISHED" != "null" ]; then
|
||||
|
|
@ -168,6 +176,9 @@ resource "kubernetes_cron_job_v1" "cloudsync_monitor" {
|
|||
# HELP cloudsync_duration_seconds Duration of the last Cloud Sync run
|
||||
# TYPE cloudsync_duration_seconds gauge
|
||||
cloudsync_duration_seconds $SYNC_DURATION
|
||||
# HELP cloudsync_transferred_bytes Bytes transferred during Cloud Sync run
|
||||
# TYPE cloudsync_transferred_bytes gauge
|
||||
cloudsync_transferred_bytes $TRANSFERRED_BYTES
|
||||
METRICS
|
||||
done
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue