add backup IO logging, Pushgateway metrics, and Grafana dashboard
- Add /proc/self/io read/write tracking to vault raft-backup and etcd backup - Push backup_duration_seconds, backup_read_bytes, backup_written_bytes, backup_last_success_timestamp to Pushgateway from all 6 backup CronJobs (etcd skipped — distroless image has no wget/curl) - Add cloudsync_duration_seconds metric to cloudsync-monitor - New "Backup Health" Grafana dashboard with 8 panels: time since last backup, overview table, duration/IO trends, cloud sync status, alerts, CronJob schedule
This commit is contained in:
parent
0b595751c5
commit
0a294a30a6
8 changed files with 530 additions and 8 deletions
|
|
@ -247,6 +247,10 @@ resource "kubernetes_cron_job_v1" "vaultwarden-backup" {
|
|||
command = ["/bin/sh", "-c", <<-EOT
|
||||
set -euxo pipefail
|
||||
apk add --no-cache sqlite
|
||||
_t0=$(date +%s)
|
||||
_rb0=$(awk '/^read_bytes/{print $2}' /proc/self/io 2>/dev/null || echo 0)
|
||||
_wb0=$(awk '/^write_bytes/{print $2}' /proc/self/io 2>/dev/null || echo 0)
|
||||
|
||||
now=$(date +"%Y_%m_%d_%H_%M")
|
||||
# Pre-flight: verify source DB is healthy before backing up
|
||||
if ! sqlite3 /data/db.sqlite3 "PRAGMA integrity_check;" | grep -q "^ok$"; then
|
||||
|
|
@ -269,7 +273,21 @@ resource "kubernetes_cron_job_v1" "vaultwarden-backup" {
|
|||
cp -a /data/config.json /backup/$now/ 2>/dev/null || true
|
||||
# Rotate — 30 day retention
|
||||
find /backup -maxdepth 1 -mindepth 1 -type d -mtime +30 -exec rm -rf {} +
|
||||
echo "Backup complete: $now"
|
||||
|
||||
_dur=$(($(date +%s) - _t0))
|
||||
_rb1=$(awk '/^read_bytes/{print $2}' /proc/self/io 2>/dev/null || echo 0)
|
||||
_wb1=$(awk '/^write_bytes/{print $2}' /proc/self/io 2>/dev/null || echo 0)
|
||||
echo "=== Backup IO Stats ==="
|
||||
echo "duration: $${_dur}s"
|
||||
echo "read: $(( (_rb1 - _rb0) / 1048576 )) MiB"
|
||||
echo "written: $(( (_wb1 - _wb0) / 1048576 )) MiB"
|
||||
echo "output: $(du -sh /backup/$$now | awk '{print $$1}')"
|
||||
|
||||
wget -qO- --post-data "backup_duration_seconds $${_dur}
|
||||
backup_read_bytes $(( _rb1 - _rb0 ))
|
||||
backup_written_bytes $(( _wb1 - _wb0 ))
|
||||
backup_last_success_timestamp $(date +%s)
|
||||
" "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/vaultwarden-backup" || true
|
||||
EOT
|
||||
]
|
||||
volume_mount {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue