feat: add per-database backups for PostgreSQL and MySQL

Add separate CronJobs that dump each database individually:
- postgresql-backup-per-db: pg_dump -Fc per DB (daily 00:15)
- mysql-backup-per-db: mysqldump per DB (daily 00:45)

Dumps go to /backup/per-db/<dbname>/ on the same NFS PVC.
Enables single-database restore without affecting other databases.
Also fixed CNPG superuser password sync and added --single-transaction
--set-gtid-purged=OFF to MySQL per-db dumps.

Updated restore runbooks with per-database restore procedures.

[ci skip]

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-04-14 22:39:33 +00:00
parent ca1ae23f34
commit 0256ccdccc
3 changed files with 271 additions and 0 deletions

View file

@ -96,6 +96,38 @@ kubectl exec -it mysql-cluster-0 -n dbaas -c mysql -- mysqlsh root@localhost --p
kubectl exec -it mysql-cluster-0 -n dbaas -c mysql -- mysqlsh root@localhost --password="$ROOT_PWD" -- cluster rejoinInstance root@mysql-cluster-1:3306
```
## Restore Single Database (from per-db backup)
Per-database backups are stored at `/mnt/main/mysql-backup/per-db/<dbname>/` as gzipped SQL dumps.
### 1. List available per-db backups
```bash
ls -lt /mnt/main/mysql-backup/per-db/<dbname>/
```
### 2. Restore a single database
```bash
# Port-forward to MySQL
kubectl port-forward svc/mysql -n dbaas 3307:3306 &
ROOT_PWD=$(kubectl get secret cluster-secret -n dbaas -o jsonpath='{.data.ROOT_PASSWORD}' | base64 -d)
# Restore single database (this replaces only the target database)
zcat /path/to/per-db/<dbname>/dump_YYYY_MM_DD_HH_MM.sql.gz | mysql -u root -p"$ROOT_PWD" --host 127.0.0.1 --port 3307 <dbname>
```
### 3. Verify
```bash
mysql -u root -p"$ROOT_PWD" --host 127.0.0.1 --port 3307 -e \
"SELECT TABLE_NAME, TABLE_ROWS FROM information_schema.TABLES WHERE TABLE_SCHEMA='<dbname>' ORDER BY TABLE_ROWS DESC LIMIT 10;"
```
### 4. Restart the affected service only
```bash
kubectl rollout restart deployment -n <namespace>
```
**Advantages over full restore**: Only the target database is affected. All other databases continue running with their current data.
## Alternative: Restore from sda Backup
If TrueNAS NFS is unavailable but the PVE host is accessible:

View file

@ -84,6 +84,43 @@ kubectl rollout restart deployment -n linkwarden
# ... repeat for all PG-dependent services (excluding trading — disabled)
```
## Restore Single Database (from per-db backup)
Per-database backups use `pg_dump -Fc` (custom format) and are stored at `/mnt/main/postgresql-backup/per-db/<dbname>/`.
### 1. List available per-db backups
```bash
ls -lt /mnt/main/postgresql-backup/per-db/<dbname>/
# Or via a pod:
kubectl exec -n dbaas pg-cluster-1 -c postgres -- ls -lt /backup/per-db/<dbname>/ 2>/dev/null || \
echo "Mount a backup pod — see Option A below"
```
### 2. Restore a single database
```bash
# Port-forward to the CNPG primary
kubectl port-forward svc/pg-cluster-rw -n dbaas 5433:5432 &
# Restore single database (drops and recreates objects in that DB only)
PGPASSWORD=$(kubectl get secret pg-cluster-superuser -n dbaas -o jsonpath='{.data.password}' | base64 -d) \
pg_restore -h 127.0.0.1 -p 5433 -U postgres -d <dbname> --clean --if-exists \
/path/to/per-db/<dbname>/dump_YYYY_MM_DD_HH_MM.dump
```
### 3. Verify
```bash
PGPASSWORD=$PGPASSWORD psql -h 127.0.0.1 -p 5433 -U postgres -d <dbname> -c \
"SELECT schemaname, tablename, n_live_tup FROM pg_stat_user_tables ORDER BY n_live_tup DESC LIMIT 10;"
```
### 4. Restart the affected service only
```bash
kubectl rollout restart deployment -n <namespace>
```
**Advantages over full restore**: Only the target database is affected. All other databases continue running with their current data.
## Alternative: Restore from sda Backup
If TrueNAS NFS is unavailable but the PVE host is accessible:

View file

@ -516,6 +516,101 @@ resource "kubernetes_cron_job_v1" "mysql-backup" {
}
}
# Per-database MySQL backups (enables single-database restore without affecting others)
resource "kubernetes_cron_job_v1" "mysql-backup-per-db" {
metadata {
name = "mysql-backup-per-db"
namespace = kubernetes_namespace.dbaas.metadata[0].name
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 3
schedule = "45 0 * * *"
starting_deadline_seconds = 10
successful_jobs_history_limit = 3
job_template {
metadata {}
spec {
backoff_limit = 3
ttl_seconds_after_finished = 10
template {
metadata {}
spec {
container {
name = "mysql-backup-per-db"
image = "docker.io/library/mysql:8.0"
env {
name = "MYSQL_PWD"
value_from {
secret_key_ref {
name = "cluster-secret"
key = "ROOT_PASSWORD"
}
}
}
command = ["/bin/bash", "-c", <<-EOT
set -euo pipefail
_t0=$(date +%s)
now=$(date +"%Y_%m_%d_%H_%M")
MYSQL_HOST=mysql.dbaas.svc.cluster.local
failed=0
total=0
ok=0
# Discover all user databases
dbs=$(mysql -u root --host $MYSQL_HOST -N -e \
"SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT IN ('mysql','information_schema','performance_schema','sys','mysql_innodb_cluster_metadata');")
for db in $dbs; do
total=$((total + 1))
mkdir -p /backup/per-db/$db
echo "=== Backing up $db ==="
if mysqldump -u root --host $MYSQL_HOST --single-transaction --set-gtid-purged=OFF "$db" | gzip -9 > "/backup/per-db/$db/dump_$now.sql.gz"; then
_size=$(stat -c%s "/backup/per-db/$db/dump_$now.sql.gz")
echo " OK — $(( _size / 1024 )) KiB"
ok=$((ok + 1))
else
echo " FAILED"
rm -f "/backup/per-db/$db/dump_$now.sql.gz"
failed=$((failed + 1))
fi
done
# Rotate 14 day retention per database
find /backup/per-db -name "dump_*.sql.gz" -type f -mtime +14 -delete
_dur=$(($(date +%s) - _t0))
echo "=== Per-DB Backup Summary ==="
echo "databases: $total (ok: $ok, failed: $failed)"
echo "duration: $${_dur}s"
curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/mysql-backup-per-db" <<PGEOF || true
backup_duration_seconds $${_dur}
backup_databases_total $total
backup_databases_ok $ok
backup_databases_failed $failed
backup_last_success_timestamp $(date +%s)
PGEOF
EOT
]
volume_mount {
name = "mysql-backup"
mount_path = "/backup"
}
}
volume {
name = "mysql-backup"
persistent_volume_claim {
claim_name = module.nfs_mysql_backup_host.claim_name
}
}
}
}
}
}
}
}
# resource "kubernetes_persistent_volume" "mysql" {
# metadata {
# name = "mysql-pv"
@ -1283,3 +1378,110 @@ resource "kubernetes_cron_job_v1" "postgresql-backup" {
}
}
}
# Per-database PostgreSQL backups (enables single-database restore without affecting others)
resource "kubernetes_cron_job_v1" "postgresql-backup-per-db" {
metadata {
name = "postgresql-backup-per-db"
namespace = kubernetes_namespace.dbaas.metadata[0].name
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 3
schedule = "15 0 * * *"
starting_deadline_seconds = 10
successful_jobs_history_limit = 3
job_template {
metadata {}
spec {
backoff_limit = 3
ttl_seconds_after_finished = 10
template {
metadata {}
spec {
container {
name = "postgresql-backup-per-db"
image = "docker.io/library/postgres:16.4-bullseye"
env {
name = "PGPASSWORD"
value_from {
secret_key_ref {
name = "pg-cluster-superuser"
key = "password"
}
}
}
command = ["/bin/bash", "-c", <<-EOT
set -euo pipefail
apt-get update -qq && apt-get install -yqq curl >/dev/null 2>&1 || true
_t0=$(date +%s)
now=$(date +"%Y_%m_%d_%H_%M")
PGHOST=pg-cluster-rw.dbaas
PGUSER=postgres
failed=0
total=0
ok=0
# Discover all user databases
dbs=$(PGPASSWORD=$PGPASSWORD psql -h $PGHOST -U $PGUSER -t -A -c \
"SELECT datname FROM pg_database WHERE datistemplate = false AND datname != 'postgres' ORDER BY datname;")
for db in $dbs; do
total=$((total + 1))
mkdir -p /backup/per-db/$db
echo "=== Backing up $db ==="
if PGPASSWORD=$PGPASSWORD pg_dump -Fc -h $PGHOST -U $PGUSER "$db" > "/backup/per-db/$db/dump_$now.dump"; then
_size=$(stat -c%s "/backup/per-db/$db/dump_$now.dump")
echo " OK — $(( _size / 1024 )) KiB"
ok=$((ok + 1))
else
echo " FAILED"
rm -f "/backup/per-db/$db/dump_$now.dump"
failed=$((failed + 1))
fi
done
# Rotate 14 day retention per database
find /backup/per-db -name "dump_*.dump" -type f -mtime +14 -delete
_dur=$(($(date +%s) - _t0))
echo "=== Per-DB Backup Summary ==="
echo "databases: $total (ok: $ok, failed: $failed)"
echo "duration: $${_dur}s"
curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/postgresql-backup-per-db" <<PGEOF || true
backup_duration_seconds $${_dur}
backup_databases_total $total
backup_databases_ok $ok
backup_databases_failed $failed
backup_last_success_timestamp $(date +%s)
PGEOF
EOT
]
volume_mount {
name = "postgresql-backup"
mount_path = "/backup"
}
resources {
requests = {
memory = "256Mi"
cpu = "50m"
}
limits = {
memory = "512Mi"
}
}
}
volume {
name = "postgresql-backup"
persistent_volume_claim {
claim_name = module.nfs_postgresql_backup_host.claim_name
}
}
}
}
}
}
}
}