diff --git a/docs/runbooks/restore-mysql.md b/docs/runbooks/restore-mysql.md index 145f7f5d..353cb68d 100644 --- a/docs/runbooks/restore-mysql.md +++ b/docs/runbooks/restore-mysql.md @@ -96,6 +96,38 @@ kubectl exec -it mysql-cluster-0 -n dbaas -c mysql -- mysqlsh root@localhost --p kubectl exec -it mysql-cluster-0 -n dbaas -c mysql -- mysqlsh root@localhost --password="$ROOT_PWD" -- cluster rejoinInstance root@mysql-cluster-1:3306 ``` +## Restore Single Database (from per-db backup) + +Per-database backups are stored at `/mnt/main/mysql-backup/per-db//` as gzipped SQL dumps. + +### 1. List available per-db backups +```bash +ls -lt /mnt/main/mysql-backup/per-db// +``` + +### 2. Restore a single database +```bash +# Port-forward to MySQL +kubectl port-forward svc/mysql -n dbaas 3307:3306 & +ROOT_PWD=$(kubectl get secret cluster-secret -n dbaas -o jsonpath='{.data.ROOT_PASSWORD}' | base64 -d) + +# Restore single database (this replaces only the target database) +zcat /path/to/per-db//dump_YYYY_MM_DD_HH_MM.sql.gz | mysql -u root -p"$ROOT_PWD" --host 127.0.0.1 --port 3307 +``` + +### 3. Verify +```bash +mysql -u root -p"$ROOT_PWD" --host 127.0.0.1 --port 3307 -e \ + "SELECT TABLE_NAME, TABLE_ROWS FROM information_schema.TABLES WHERE TABLE_SCHEMA='' ORDER BY TABLE_ROWS DESC LIMIT 10;" +``` + +### 4. Restart the affected service only +```bash +kubectl rollout restart deployment -n +``` + +**Advantages over full restore**: Only the target database is affected. All other databases continue running with their current data. + ## Alternative: Restore from sda Backup If TrueNAS NFS is unavailable but the PVE host is accessible: diff --git a/docs/runbooks/restore-postgresql.md b/docs/runbooks/restore-postgresql.md index 387f620c..f656fefc 100644 --- a/docs/runbooks/restore-postgresql.md +++ b/docs/runbooks/restore-postgresql.md @@ -84,6 +84,43 @@ kubectl rollout restart deployment -n linkwarden # ... repeat for all PG-dependent services (excluding trading — disabled) ``` +## Restore Single Database (from per-db backup) + +Per-database backups use `pg_dump -Fc` (custom format) and are stored at `/mnt/main/postgresql-backup/per-db//`. + +### 1. List available per-db backups +```bash +ls -lt /mnt/main/postgresql-backup/per-db// + +# Or via a pod: +kubectl exec -n dbaas pg-cluster-1 -c postgres -- ls -lt /backup/per-db// 2>/dev/null || \ + echo "Mount a backup pod — see Option A below" +``` + +### 2. Restore a single database +```bash +# Port-forward to the CNPG primary +kubectl port-forward svc/pg-cluster-rw -n dbaas 5433:5432 & + +# Restore single database (drops and recreates objects in that DB only) +PGPASSWORD=$(kubectl get secret pg-cluster-superuser -n dbaas -o jsonpath='{.data.password}' | base64 -d) \ + pg_restore -h 127.0.0.1 -p 5433 -U postgres -d --clean --if-exists \ + /path/to/per-db//dump_YYYY_MM_DD_HH_MM.dump +``` + +### 3. Verify +```bash +PGPASSWORD=$PGPASSWORD psql -h 127.0.0.1 -p 5433 -U postgres -d -c \ + "SELECT schemaname, tablename, n_live_tup FROM pg_stat_user_tables ORDER BY n_live_tup DESC LIMIT 10;" +``` + +### 4. Restart the affected service only +```bash +kubectl rollout restart deployment -n +``` + +**Advantages over full restore**: Only the target database is affected. All other databases continue running with their current data. + ## Alternative: Restore from sda Backup If TrueNAS NFS is unavailable but the PVE host is accessible: diff --git a/stacks/dbaas/modules/dbaas/main.tf b/stacks/dbaas/modules/dbaas/main.tf index 7b4035e6..16c32265 100644 --- a/stacks/dbaas/modules/dbaas/main.tf +++ b/stacks/dbaas/modules/dbaas/main.tf @@ -516,6 +516,101 @@ resource "kubernetes_cron_job_v1" "mysql-backup" { } } +# Per-database MySQL backups (enables single-database restore without affecting others) +resource "kubernetes_cron_job_v1" "mysql-backup-per-db" { + metadata { + name = "mysql-backup-per-db" + namespace = kubernetes_namespace.dbaas.metadata[0].name + } + spec { + concurrency_policy = "Replace" + failed_jobs_history_limit = 3 + schedule = "45 0 * * *" + starting_deadline_seconds = 10 + successful_jobs_history_limit = 3 + job_template { + metadata {} + spec { + backoff_limit = 3 + ttl_seconds_after_finished = 10 + template { + metadata {} + spec { + container { + name = "mysql-backup-per-db" + image = "docker.io/library/mysql:8.0" + env { + name = "MYSQL_PWD" + value_from { + secret_key_ref { + name = "cluster-secret" + key = "ROOT_PASSWORD" + } + } + } + command = ["/bin/bash", "-c", <<-EOT + set -euo pipefail + _t0=$(date +%s) + now=$(date +"%Y_%m_%d_%H_%M") + MYSQL_HOST=mysql.dbaas.svc.cluster.local + failed=0 + total=0 + ok=0 + + # Discover all user databases + dbs=$(mysql -u root --host $MYSQL_HOST -N -e \ + "SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT IN ('mysql','information_schema','performance_schema','sys','mysql_innodb_cluster_metadata');") + + for db in $dbs; do + total=$((total + 1)) + mkdir -p /backup/per-db/$db + echo "=== Backing up $db ===" + if mysqldump -u root --host $MYSQL_HOST --single-transaction --set-gtid-purged=OFF "$db" | gzip -9 > "/backup/per-db/$db/dump_$now.sql.gz"; then + _size=$(stat -c%s "/backup/per-db/$db/dump_$now.sql.gz") + echo " OK — $(( _size / 1024 )) KiB" + ok=$((ok + 1)) + else + echo " FAILED" + rm -f "/backup/per-db/$db/dump_$now.sql.gz" + failed=$((failed + 1)) + fi + done + + # Rotate — 14 day retention per database + find /backup/per-db -name "dump_*.sql.gz" -type f -mtime +14 -delete + + _dur=$(($(date +%s) - _t0)) + echo "=== Per-DB Backup Summary ===" + echo "databases: $total (ok: $ok, failed: $failed)" + echo "duration: $${_dur}s" + + curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/mysql-backup-per-db" </dev/null 2>&1 || true + + _t0=$(date +%s) + now=$(date +"%Y_%m_%d_%H_%M") + PGHOST=pg-cluster-rw.dbaas + PGUSER=postgres + failed=0 + total=0 + ok=0 + + # Discover all user databases + dbs=$(PGPASSWORD=$PGPASSWORD psql -h $PGHOST -U $PGUSER -t -A -c \ + "SELECT datname FROM pg_database WHERE datistemplate = false AND datname != 'postgres' ORDER BY datname;") + + for db in $dbs; do + total=$((total + 1)) + mkdir -p /backup/per-db/$db + echo "=== Backing up $db ===" + if PGPASSWORD=$PGPASSWORD pg_dump -Fc -h $PGHOST -U $PGUSER "$db" > "/backup/per-db/$db/dump_$now.dump"; then + _size=$(stat -c%s "/backup/per-db/$db/dump_$now.dump") + echo " OK — $(( _size / 1024 )) KiB" + ok=$((ok + 1)) + else + echo " FAILED" + rm -f "/backup/per-db/$db/dump_$now.dump" + failed=$((failed + 1)) + fi + done + + # Rotate — 14 day retention per database + find /backup/per-db -name "dump_*.dump" -type f -mtime +14 -delete + + _dur=$(($(date +%s) - _t0)) + echo "=== Per-DB Backup Summary ===" + echo "databases: $total (ok: $ok, failed: $failed)" + echo "duration: $${_dur}s" + + curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/postgresql-backup-per-db" <