feat: add per-database backups for PostgreSQL and MySQL
Add separate CronJobs that dump each database individually: - postgresql-backup-per-db: pg_dump -Fc per DB (daily 00:15) - mysql-backup-per-db: mysqldump per DB (daily 00:45) Dumps go to /backup/per-db/<dbname>/ on the same NFS PVC. Enables single-database restore without affecting other databases. Also fixed CNPG superuser password sync and added --single-transaction --set-gtid-purged=OFF to MySQL per-db dumps. Updated restore runbooks with per-database restore procedures. [ci skip] Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ca1ae23f34
commit
0256ccdccc
3 changed files with 271 additions and 0 deletions
|
|
@ -96,6 +96,38 @@ kubectl exec -it mysql-cluster-0 -n dbaas -c mysql -- mysqlsh root@localhost --p
|
|||
kubectl exec -it mysql-cluster-0 -n dbaas -c mysql -- mysqlsh root@localhost --password="$ROOT_PWD" -- cluster rejoinInstance root@mysql-cluster-1:3306
|
||||
```
|
||||
|
||||
## Restore Single Database (from per-db backup)
|
||||
|
||||
Per-database backups are stored at `/mnt/main/mysql-backup/per-db/<dbname>/` as gzipped SQL dumps.
|
||||
|
||||
### 1. List available per-db backups
|
||||
```bash
|
||||
ls -lt /mnt/main/mysql-backup/per-db/<dbname>/
|
||||
```
|
||||
|
||||
### 2. Restore a single database
|
||||
```bash
|
||||
# Port-forward to MySQL
|
||||
kubectl port-forward svc/mysql -n dbaas 3307:3306 &
|
||||
ROOT_PWD=$(kubectl get secret cluster-secret -n dbaas -o jsonpath='{.data.ROOT_PASSWORD}' | base64 -d)
|
||||
|
||||
# Restore single database (this replaces only the target database)
|
||||
zcat /path/to/per-db/<dbname>/dump_YYYY_MM_DD_HH_MM.sql.gz | mysql -u root -p"$ROOT_PWD" --host 127.0.0.1 --port 3307 <dbname>
|
||||
```
|
||||
|
||||
### 3. Verify
|
||||
```bash
|
||||
mysql -u root -p"$ROOT_PWD" --host 127.0.0.1 --port 3307 -e \
|
||||
"SELECT TABLE_NAME, TABLE_ROWS FROM information_schema.TABLES WHERE TABLE_SCHEMA='<dbname>' ORDER BY TABLE_ROWS DESC LIMIT 10;"
|
||||
```
|
||||
|
||||
### 4. Restart the affected service only
|
||||
```bash
|
||||
kubectl rollout restart deployment -n <namespace>
|
||||
```
|
||||
|
||||
**Advantages over full restore**: Only the target database is affected. All other databases continue running with their current data.
|
||||
|
||||
## Alternative: Restore from sda Backup
|
||||
|
||||
If TrueNAS NFS is unavailable but the PVE host is accessible:
|
||||
|
|
|
|||
|
|
@ -84,6 +84,43 @@ kubectl rollout restart deployment -n linkwarden
|
|||
# ... repeat for all PG-dependent services (excluding trading — disabled)
|
||||
```
|
||||
|
||||
## Restore Single Database (from per-db backup)
|
||||
|
||||
Per-database backups use `pg_dump -Fc` (custom format) and are stored at `/mnt/main/postgresql-backup/per-db/<dbname>/`.
|
||||
|
||||
### 1. List available per-db backups
|
||||
```bash
|
||||
ls -lt /mnt/main/postgresql-backup/per-db/<dbname>/
|
||||
|
||||
# Or via a pod:
|
||||
kubectl exec -n dbaas pg-cluster-1 -c postgres -- ls -lt /backup/per-db/<dbname>/ 2>/dev/null || \
|
||||
echo "Mount a backup pod — see Option A below"
|
||||
```
|
||||
|
||||
### 2. Restore a single database
|
||||
```bash
|
||||
# Port-forward to the CNPG primary
|
||||
kubectl port-forward svc/pg-cluster-rw -n dbaas 5433:5432 &
|
||||
|
||||
# Restore single database (drops and recreates objects in that DB only)
|
||||
PGPASSWORD=$(kubectl get secret pg-cluster-superuser -n dbaas -o jsonpath='{.data.password}' | base64 -d) \
|
||||
pg_restore -h 127.0.0.1 -p 5433 -U postgres -d <dbname> --clean --if-exists \
|
||||
/path/to/per-db/<dbname>/dump_YYYY_MM_DD_HH_MM.dump
|
||||
```
|
||||
|
||||
### 3. Verify
|
||||
```bash
|
||||
PGPASSWORD=$PGPASSWORD psql -h 127.0.0.1 -p 5433 -U postgres -d <dbname> -c \
|
||||
"SELECT schemaname, tablename, n_live_tup FROM pg_stat_user_tables ORDER BY n_live_tup DESC LIMIT 10;"
|
||||
```
|
||||
|
||||
### 4. Restart the affected service only
|
||||
```bash
|
||||
kubectl rollout restart deployment -n <namespace>
|
||||
```
|
||||
|
||||
**Advantages over full restore**: Only the target database is affected. All other databases continue running with their current data.
|
||||
|
||||
## Alternative: Restore from sda Backup
|
||||
|
||||
If TrueNAS NFS is unavailable but the PVE host is accessible:
|
||||
|
|
|
|||
|
|
@ -516,6 +516,101 @@ resource "kubernetes_cron_job_v1" "mysql-backup" {
|
|||
}
|
||||
}
|
||||
|
||||
# Per-database MySQL backups (enables single-database restore without affecting others)
|
||||
resource "kubernetes_cron_job_v1" "mysql-backup-per-db" {
|
||||
metadata {
|
||||
name = "mysql-backup-per-db"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
concurrency_policy = "Replace"
|
||||
failed_jobs_history_limit = 3
|
||||
schedule = "45 0 * * *"
|
||||
starting_deadline_seconds = 10
|
||||
successful_jobs_history_limit = 3
|
||||
job_template {
|
||||
metadata {}
|
||||
spec {
|
||||
backoff_limit = 3
|
||||
ttl_seconds_after_finished = 10
|
||||
template {
|
||||
metadata {}
|
||||
spec {
|
||||
container {
|
||||
name = "mysql-backup-per-db"
|
||||
image = "docker.io/library/mysql:8.0"
|
||||
env {
|
||||
name = "MYSQL_PWD"
|
||||
value_from {
|
||||
secret_key_ref {
|
||||
name = "cluster-secret"
|
||||
key = "ROOT_PASSWORD"
|
||||
}
|
||||
}
|
||||
}
|
||||
command = ["/bin/bash", "-c", <<-EOT
|
||||
set -euo pipefail
|
||||
_t0=$(date +%s)
|
||||
now=$(date +"%Y_%m_%d_%H_%M")
|
||||
MYSQL_HOST=mysql.dbaas.svc.cluster.local
|
||||
failed=0
|
||||
total=0
|
||||
ok=0
|
||||
|
||||
# Discover all user databases
|
||||
dbs=$(mysql -u root --host $MYSQL_HOST -N -e \
|
||||
"SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT IN ('mysql','information_schema','performance_schema','sys','mysql_innodb_cluster_metadata');")
|
||||
|
||||
for db in $dbs; do
|
||||
total=$((total + 1))
|
||||
mkdir -p /backup/per-db/$db
|
||||
echo "=== Backing up $db ==="
|
||||
if mysqldump -u root --host $MYSQL_HOST --single-transaction --set-gtid-purged=OFF "$db" | gzip -9 > "/backup/per-db/$db/dump_$now.sql.gz"; then
|
||||
_size=$(stat -c%s "/backup/per-db/$db/dump_$now.sql.gz")
|
||||
echo " OK — $(( _size / 1024 )) KiB"
|
||||
ok=$((ok + 1))
|
||||
else
|
||||
echo " FAILED"
|
||||
rm -f "/backup/per-db/$db/dump_$now.sql.gz"
|
||||
failed=$((failed + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Rotate — 14 day retention per database
|
||||
find /backup/per-db -name "dump_*.sql.gz" -type f -mtime +14 -delete
|
||||
|
||||
_dur=$(($(date +%s) - _t0))
|
||||
echo "=== Per-DB Backup Summary ==="
|
||||
echo "databases: $total (ok: $ok, failed: $failed)"
|
||||
echo "duration: $${_dur}s"
|
||||
|
||||
curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/mysql-backup-per-db" <<PGEOF || true
|
||||
backup_duration_seconds $${_dur}
|
||||
backup_databases_total $total
|
||||
backup_databases_ok $ok
|
||||
backup_databases_failed $failed
|
||||
backup_last_success_timestamp $(date +%s)
|
||||
PGEOF
|
||||
EOT
|
||||
]
|
||||
volume_mount {
|
||||
name = "mysql-backup"
|
||||
mount_path = "/backup"
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "mysql-backup"
|
||||
persistent_volume_claim {
|
||||
claim_name = module.nfs_mysql_backup_host.claim_name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# resource "kubernetes_persistent_volume" "mysql" {
|
||||
# metadata {
|
||||
# name = "mysql-pv"
|
||||
|
|
@ -1283,3 +1378,110 @@ resource "kubernetes_cron_job_v1" "postgresql-backup" {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Per-database PostgreSQL backups (enables single-database restore without affecting others)
|
||||
resource "kubernetes_cron_job_v1" "postgresql-backup-per-db" {
|
||||
metadata {
|
||||
name = "postgresql-backup-per-db"
|
||||
namespace = kubernetes_namespace.dbaas.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
concurrency_policy = "Replace"
|
||||
failed_jobs_history_limit = 3
|
||||
schedule = "15 0 * * *"
|
||||
starting_deadline_seconds = 10
|
||||
successful_jobs_history_limit = 3
|
||||
job_template {
|
||||
metadata {}
|
||||
spec {
|
||||
backoff_limit = 3
|
||||
ttl_seconds_after_finished = 10
|
||||
template {
|
||||
metadata {}
|
||||
spec {
|
||||
container {
|
||||
name = "postgresql-backup-per-db"
|
||||
image = "docker.io/library/postgres:16.4-bullseye"
|
||||
env {
|
||||
name = "PGPASSWORD"
|
||||
value_from {
|
||||
secret_key_ref {
|
||||
name = "pg-cluster-superuser"
|
||||
key = "password"
|
||||
}
|
||||
}
|
||||
}
|
||||
command = ["/bin/bash", "-c", <<-EOT
|
||||
set -euo pipefail
|
||||
apt-get update -qq && apt-get install -yqq curl >/dev/null 2>&1 || true
|
||||
|
||||
_t0=$(date +%s)
|
||||
now=$(date +"%Y_%m_%d_%H_%M")
|
||||
PGHOST=pg-cluster-rw.dbaas
|
||||
PGUSER=postgres
|
||||
failed=0
|
||||
total=0
|
||||
ok=0
|
||||
|
||||
# Discover all user databases
|
||||
dbs=$(PGPASSWORD=$PGPASSWORD psql -h $PGHOST -U $PGUSER -t -A -c \
|
||||
"SELECT datname FROM pg_database WHERE datistemplate = false AND datname != 'postgres' ORDER BY datname;")
|
||||
|
||||
for db in $dbs; do
|
||||
total=$((total + 1))
|
||||
mkdir -p /backup/per-db/$db
|
||||
echo "=== Backing up $db ==="
|
||||
if PGPASSWORD=$PGPASSWORD pg_dump -Fc -h $PGHOST -U $PGUSER "$db" > "/backup/per-db/$db/dump_$now.dump"; then
|
||||
_size=$(stat -c%s "/backup/per-db/$db/dump_$now.dump")
|
||||
echo " OK — $(( _size / 1024 )) KiB"
|
||||
ok=$((ok + 1))
|
||||
else
|
||||
echo " FAILED"
|
||||
rm -f "/backup/per-db/$db/dump_$now.dump"
|
||||
failed=$((failed + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Rotate — 14 day retention per database
|
||||
find /backup/per-db -name "dump_*.dump" -type f -mtime +14 -delete
|
||||
|
||||
_dur=$(($(date +%s) - _t0))
|
||||
echo "=== Per-DB Backup Summary ==="
|
||||
echo "databases: $total (ok: $ok, failed: $failed)"
|
||||
echo "duration: $${_dur}s"
|
||||
|
||||
curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/postgresql-backup-per-db" <<PGEOF || true
|
||||
backup_duration_seconds $${_dur}
|
||||
backup_databases_total $total
|
||||
backup_databases_ok $ok
|
||||
backup_databases_failed $failed
|
||||
backup_last_success_timestamp $(date +%s)
|
||||
PGEOF
|
||||
EOT
|
||||
]
|
||||
volume_mount {
|
||||
name = "postgresql-backup"
|
||||
mount_path = "/backup"
|
||||
}
|
||||
resources {
|
||||
requests = {
|
||||
memory = "256Mi"
|
||||
cpu = "50m"
|
||||
}
|
||||
limits = {
|
||||
memory = "512Mi"
|
||||
}
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "postgresql-backup"
|
||||
persistent_volume_claim {
|
||||
claim_name = module.nfs_postgresql_backup_host.claim_name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue