add 3-2-1 backup pipeline: weekly PVC file copy, NFS mirror, pfsense, offsite sync
- weekly-backup.sh: mounts LVM thin snapshots ro, rsyncs files to /mnt/backup/pvc-data with --link-dest versioning (4 weeks). Also mirrors NFS backup dirs from TrueNAS, backs up pfsense (config.xml + full tar), PVE host config, and prunes >7d snapshots. - offsite-sync-backup.sh: rsync --files-from manifest to Synology (no full dir walk). Monthly full --delete sync on 1st Sunday. After=weekly-backup.service dependency. - lvm-pvc-snapshot.timer: changed to daily 03:00 (was 2x daily) - Prometheus alerts: WeeklyBackupStale, WeeklyBackupFailing, PfsenseBackupStale, OffsiteBackupSyncStale, BackupDiskFull. LVMSnapshotStale threshold 24h→48h.
This commit is contained in:
parent
09b4bad958
commit
d009f9a0f2
8 changed files with 422 additions and 2 deletions
79
scripts/offsite-sync-backup.sh
Normal file
79
scripts/offsite-sync-backup.sh
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
#!/usr/bin/env bash
|
||||
# offsite-sync-backup — Sync /mnt/backup to Synology NAS using changed-files manifest
|
||||
# Deploy to PVE host at /usr/local/bin/offsite-sync-backup
|
||||
# Schedule: Weekly Sunday 08:00 via systemd timer (After=weekly-backup.service)
|
||||
set -euo pipefail
|
||||
|
||||
# --- Configuration ---
|
||||
BACKUP_ROOT="/mnt/backup"
|
||||
DEST="Administrator@192.168.1.13:/volume1/Backup/Viki/pve-backup"
|
||||
MANIFEST="${BACKUP_ROOT}/.changed-files"
|
||||
PUSHGATEWAY="${OFFSITE_SYNC_PUSHGATEWAY:-http://10.0.20.100:30091}"
|
||||
PUSHGATEWAY_JOB="offsite-backup-sync"
|
||||
LOCKFILE="/run/offsite-sync-backup.lock"
|
||||
|
||||
# --- Logging ---
|
||||
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }
|
||||
warn() { log "WARN: $*" >&2; }
|
||||
|
||||
# --- Locking ---
|
||||
cleanup() { rm -f "${LOCKFILE}"; }
|
||||
trap cleanup EXIT
|
||||
if ! ( set -o noclobber; echo $$ > "${LOCKFILE}" ) 2>/dev/null; then
|
||||
log "FATAL: Another instance running"; exit 1
|
||||
fi
|
||||
|
||||
# --- Main ---
|
||||
log "=== Offsite sync starting ==="
|
||||
STATUS=0
|
||||
|
||||
if ! mountpoint -q "${BACKUP_ROOT}"; then
|
||||
log "FATAL: ${BACKUP_ROOT} is not mounted"; exit 1
|
||||
fi
|
||||
|
||||
# Test SSH connectivity first
|
||||
if ! timeout 10 ssh -o BatchMode=yes -o ConnectTimeout=5 Administrator@192.168.1.13 true 2>/dev/null; then
|
||||
log "FATAL: Cannot SSH to Synology (192.168.1.13)"
|
||||
echo "backup_last_success_timestamp 0" | \
|
||||
curl -s --connect-timeout 5 --max-time 10 --data-binary @- \
|
||||
"${PUSHGATEWAY}/metrics/job/${PUSHGATEWAY_JOB}" 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DAY_OF_MONTH=$(date +%d)
|
||||
|
||||
if [ "${DAY_OF_MONTH}" -le 7 ]; then
|
||||
# First Sunday of month: full sync with --delete to clean orphans on Synology
|
||||
log "Monthly full sync (1st Sunday)..."
|
||||
rsync -az --delete \
|
||||
--exclude='.changed-files' \
|
||||
--exclude='.last-offsite-sync' \
|
||||
--exclude='.lv-pvc-mapping.json' \
|
||||
"${BACKUP_ROOT}/" "${DEST}/" 2>&1 || STATUS=1
|
||||
elif [ -s "${MANIFEST}" ]; then
|
||||
# Incremental: only send files listed in manifest (no remote dir walk)
|
||||
MANIFEST_LINES=$(wc -l < "${MANIFEST}")
|
||||
log "Incremental sync (${MANIFEST_LINES} files from manifest)..."
|
||||
rsync -az --files-from="${MANIFEST}" --no-traverse \
|
||||
"${BACKUP_ROOT}/" "${DEST}/" 2>&1 || STATUS=1
|
||||
else
|
||||
log "No changed files in manifest, nothing to sync"
|
||||
fi
|
||||
|
||||
if [ "${STATUS}" -eq 0 ]; then
|
||||
# Only clear manifest + update timestamp on SUCCESS
|
||||
touch "${BACKUP_ROOT}/.last-offsite-sync"
|
||||
> "${MANIFEST}"
|
||||
log "=== Offsite sync complete (success) ==="
|
||||
else
|
||||
# Keep manifest for retry next week
|
||||
warn "Offsite sync had errors — manifest preserved for retry"
|
||||
log "=== Offsite sync complete (with errors) ==="
|
||||
fi
|
||||
|
||||
cat <<EOF | curl -s --connect-timeout 5 --max-time 10 --data-binary @- "${PUSHGATEWAY}/metrics/job/${PUSHGATEWAY_JOB}" 2>/dev/null || true
|
||||
backup_last_success_timestamp $(date +%s)
|
||||
offsite_sync_last_status ${STATUS}
|
||||
EOF
|
||||
|
||||
exit "${STATUS}"
|
||||
Loading…
Add table
Add a link
Reference in a new issue