From 28ad11d12c5947374e92591f9c8b241866c6883d Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 13 Apr 2026 18:06:20 +0000 Subject: [PATCH] consolidate offsite backup: inotify change tracking, deduplicate Synology paths [ci skip] Architecture overhaul: - Synology truenas/ renamed to nfs/, immich paths flattened to match source - Created nfs-ssd/ on Synology for SSD data (thumbs, ML cache) - Deleted pve-backup/nfs-mirror (53GB duplication eliminated) - New inotifywait daemon (nfs-change-tracker.service) watches /srv/nfs + /srv/nfs-ssd - offsite-sync Step 2: reads inotify change log, rsync --files-from only changed files - weekly-backup: removed NFS mirror step entirely (NFS goes direct to Synology) - Cleaned 9 orphaned LVs (101GB + 38 snapshots reclaimed from thin pool) Performance: incremental sync completes in seconds (vs 30+ min with full rsync) Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/nfs-change-tracker.service | 19 +++++ scripts/offsite-sync-backup.sh | 119 +++++++++++++++-------------- scripts/weekly-backup.sh | 51 +------------ 3 files changed, 84 insertions(+), 105 deletions(-) create mode 100644 scripts/nfs-change-tracker.service diff --git a/scripts/nfs-change-tracker.service b/scripts/nfs-change-tracker.service new file mode 100644 index 00000000..0d41ffb1 --- /dev/null +++ b/scripts/nfs-change-tracker.service @@ -0,0 +1,19 @@ +[Unit] +Description=Track NFS filesystem changes for incremental offsite backup +After=local-fs.target + +[Service] +Type=simple +ExecStart=/usr/bin/inotifywait -m -r \ + --format '%%w%%f' \ + -e create -e modify -e moved_to -e delete \ + --exclude '(/\..*swp$|/\.nfs|/\.Trash|\.db-shm$|\.db-wal$|\.db-journal$|/stats/.*\.stat$)' \ + /srv/nfs \ + /srv/nfs-ssd +StandardOutput=append:/mnt/backup/.nfs-changes.log +StandardError=journal +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target diff --git a/scripts/offsite-sync-backup.sh b/scripts/offsite-sync-backup.sh index a55570c2..9095ad13 100644 --- a/scripts/offsite-sync-backup.sh +++ b/scripts/offsite-sync-backup.sh @@ -1,22 +1,25 @@ #!/usr/bin/env bash -# offsite-sync-backup — Sync /mnt/backup to Synology NAS using changed-files manifest +# offsite-sync-backup — Sync backups to Synology NAS # Deploy to PVE host at /usr/local/bin/offsite-sync-backup # Schedule: Weekly Sunday 08:00 via systemd timer (After=weekly-backup.service) +# +# Two sync paths: +# Step 1: sda (/mnt/backup) → Synology pve-backup/ (PVC snapshots, pfsense, pve-config, sqlite) +# Step 2: NFS (/srv/nfs, /srv/nfs-ssd) → Synology nfs/, nfs-ssd/ (inotify change-tracked) set -euo pipefail # --- Configuration --- BACKUP_ROOT="/mnt/backup" -DEST="Administrator@192.168.1.13:/volume1/Backup/Viki/pve-backup" +SYNOLOGY="Administrator@192.168.1.13" +PVE_BACKUP_DEST="${SYNOLOGY}:/volume1/Backup/Viki/pve-backup" +NFS_DEST="${SYNOLOGY}:/volume1/Backup/Viki/nfs" +NFS_SSD_DEST="${SYNOLOGY}:/volume1/Backup/Viki/nfs-ssd" MANIFEST="${BACKUP_ROOT}/.changed-files" +NFS_CHANGE_LOG="${BACKUP_ROOT}/.nfs-changes.log" PUSHGATEWAY="${OFFSITE_SYNC_PUSHGATEWAY:-http://10.0.20.100:30091}" PUSHGATEWAY_JOB="offsite-backup-sync" LOCKFILE="/run/offsite-sync-backup.lock" -# NFS media — synced directly to Synology (bypasses sda, too large to fit) -NFS_BASE="/srv/nfs" -NFS_SSD_BASE="/srv/nfs-ssd" -SYNOLOGY_NFS_DEST="Administrator@192.168.1.13:/volume1/Backup/Viki/truenas" - # --- Logging --- log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; } warn() { log "WARN: $*" >&2; } @@ -36,9 +39,8 @@ if ! mountpoint -q "${BACKUP_ROOT}"; then log "FATAL: ${BACKUP_ROOT} is not mounted"; exit 1 fi -# Test SSH connectivity first -if ! timeout 10 ssh -o BatchMode=yes -o ConnectTimeout=5 Administrator@192.168.1.13 true 2>/dev/null; then - log "FATAL: Cannot SSH to Synology (192.168.1.13)" +if ! timeout 10 ssh -o BatchMode=yes -o ConnectTimeout=5 "${SYNOLOGY}" true 2>/dev/null; then + log "FATAL: Cannot SSH to Synology" echo "backup_last_success_timestamp 0" | \ curl -s --connect-timeout 5 --max-time 10 --data-binary @- \ "${PUSHGATEWAY}/metrics/job/${PUSHGATEWAY_JOB}" 2>/dev/null || true @@ -47,80 +49,83 @@ fi DAY_OF_MONTH=$(date +%d) +# ============================================================ +# STEP 1: sda → Synology pve-backup/ (PVC snapshots, pfsense, pve-config) +# ============================================================ +log "--- Step 1: sda → Synology pve-backup/ ---" + if [ "${DAY_OF_MONTH}" -le 7 ]; then - # First Sunday of month: full sync with --delete to clean orphans on Synology log "Monthly full sync (1st Sunday)..." rsync -rltz --delete --chmod=Du=rwx,Dgo=rx,Fu=rw,Fog=r \ --exclude='.changed-files' \ --exclude='.last-offsite-sync' \ --exclude='.lv-pvc-mapping.json' \ - "${BACKUP_ROOT}/" "${DEST}/" 2>&1 || STATUS=1 + --exclude='.nfs-changes.log' \ + "${BACKUP_ROOT}/" "${PVE_BACKUP_DEST}/" 2>&1 || STATUS=1 elif [ -s "${MANIFEST}" ]; then - # Incremental: only send files listed in manifest (no remote dir walk) MANIFEST_LINES=$(wc -l < "${MANIFEST}") log "Incremental sync (${MANIFEST_LINES} files from manifest)..." - rsync -rltz --chmod=Du=rwx,Dgo=rx,Fu=rw,Fog=r --files-from="${MANIFEST}" --no-traverse \ - "${BACKUP_ROOT}/" "${DEST}/" 2>&1 || STATUS=1 + rsync -rltz --chmod=Du=rwx,Dgo=rx,Fu=rw,Fog=r --files-from="${MANIFEST}" \ + "${BACKUP_ROOT}/" "${PVE_BACKUP_DEST}/" 2>&1 || STATUS=1 else log "No changed files in manifest, nothing to sync" fi # ============================================================ -# STEP 2: NFS media direct to Synology (bypasses sda — too large) -# Reuses existing TrueNAS Cloud Sync paths on Synology +# STEP 2: NFS → Synology nfs/ + nfs-ssd/ (inotify change-tracked) # ============================================================ -log "--- Step 2: NFS media direct to Synology ---" +log "--- Step 2: NFS → Synology (change-tracked) ---" -# Immich (map Proxmox paths to existing Synology layout) -for subdir in backups encoded-video library profile upload; do - if [ -d "${NFS_BASE}/immich/${subdir}" ]; then - rsync -rltz --delete \ - "${NFS_BASE}/immich/${subdir}/" \ - "${SYNOLOGY_NFS_DEST}/immich/immich/${subdir}/" 2>&1 \ - && log " OK: immich/${subdir}" \ - || { warn "Failed: immich/${subdir}"; STATUS=1; } +if [ "${DAY_OF_MONTH}" -le 7 ]; then + # Monthly: full sync with --delete for cleanup + log "Monthly full NFS sync..." + rsync -rltz --delete /srv/nfs/ "${NFS_DEST}/" 2>&1 \ + && log " OK: nfs/ full sync" || { warn "nfs/ full sync failed"; STATUS=1; } + rsync -rltz --delete /srv/nfs-ssd/ "${NFS_SSD_DEST}/" 2>&1 \ + && log " OK: nfs-ssd/ full sync" || { warn "nfs-ssd/ full sync failed"; STATUS=1; } + > "${NFS_CHANGE_LOG}" +elif [ -s "${NFS_CHANGE_LOG}" ]; then + # Incremental: only sync files logged by inotifywait + sort -u "${NFS_CHANGE_LOG}" > /tmp/nfs-changes-deduped + + # HDD NFS + grep '^/srv/nfs/' /tmp/nfs-changes-deduped | \ + while IFS= read -r f; do [ -f "$f" ] && echo "${f#/srv/nfs/}"; done \ + > /tmp/sync-nfs.list 2>/dev/null + NFS_COUNT=$(wc -l < /tmp/sync-nfs.list 2>/dev/null || echo 0) + if [ "${NFS_COUNT:-0}" -gt 0 ]; then + rsync -rltz --files-from=/tmp/sync-nfs.list /srv/nfs/ "${NFS_DEST}/" 2>&1 \ + && log " OK: nfs/ (${NFS_COUNT} files)" \ + || { warn "nfs/ incremental failed"; STATUS=1; } fi -done -# Immich PG data + dumps -if [ -d "${NFS_BASE}/immich/postgresql" ]; then - rsync -rltz --delete "${NFS_BASE}/immich/postgresql/" \ - "${SYNOLOGY_NFS_DEST}/immich/data-immich-postgresql/" 2>&1 \ - && log " OK: immich/postgresql" \ - || { warn "Failed: immich/postgresql"; STATUS=1; } -fi -# Immich SSD (thumbs, ML cache) -if [ -d "${NFS_SSD_BASE}/immich/thumbs" ]; then - rsync -rltz --delete "${NFS_SSD_BASE}/immich/thumbs/" \ - "${SYNOLOGY_NFS_DEST}/immich/immich/thumbs/" 2>&1 \ - && log " OK: immich/thumbs" \ - || { warn "Failed: immich/thumbs"; STATUS=1; } -fi -if [ -d "${NFS_SSD_BASE}/immich/machine-learning" ]; then - rsync -rltz --delete "${NFS_SSD_BASE}/immich/machine-learning/" \ - "${SYNOLOGY_NFS_DEST}/immich/machine-learning/" 2>&1 \ - && log " OK: immich/machine-learning" \ - || { warn "Failed: immich/machine-learning"; STATUS=1; } -fi -# Calibre + Audiobookshelf -for media_dir in calibre audiobookshelf; do - if [ -d "${NFS_BASE}/${media_dir}" ]; then - rsync -rltz --delete "${NFS_BASE}/${media_dir}/" \ - "${SYNOLOGY_NFS_DEST}/${media_dir}/" 2>&1 \ - && log " OK: ${media_dir}" \ - || { warn "Failed: ${media_dir}"; STATUS=1; } + + # SSD NFS + grep '^/srv/nfs-ssd/' /tmp/nfs-changes-deduped | \ + while IFS= read -r f; do [ -f "$f" ] && echo "${f#/srv/nfs-ssd/}"; done \ + > /tmp/sync-nfs-ssd.list 2>/dev/null || true + SSD_COUNT=$(wc -l < /tmp/sync-nfs-ssd.list 2>/dev/null || echo 0) + if [ "${SSD_COUNT:-0}" -gt 0 ]; then + rsync -rltz --files-from=/tmp/sync-nfs-ssd.list /srv/nfs-ssd/ "${NFS_SSD_DEST}/" 2>&1 \ + && log " OK: nfs-ssd/ (${SSD_COUNT} files)" \ + || { warn "nfs-ssd/ incremental failed"; STATUS=1; } fi -done + + TOTAL=$(wc -l < /tmp/nfs-changes-deduped) + log " Processed ${TOTAL} change events (${NFS_COUNT} nfs + ${SSD_COUNT} nfs-ssd files synced)" + > "${NFS_CHANGE_LOG}" + rm -f /tmp/nfs-changes-deduped /tmp/sync-nfs.list /tmp/sync-nfs-ssd.list +else + log " No NFS changes to sync" +fi # ============================================================ # Finish # ============================================================ if [ "${STATUS}" -eq 0 ]; then - # Only clear manifest + update timestamp on SUCCESS touch "${BACKUP_ROOT}/.last-offsite-sync" > "${MANIFEST}" log "=== Offsite sync complete (success) ===" else - # Keep manifest for retry next week warn "Offsite sync had errors — manifest preserved for retry" log "=== Offsite sync complete (with errors) ===" fi diff --git a/scripts/weekly-backup.sh b/scripts/weekly-backup.sh index 5ba63581..09580a09 100644 --- a/scripts/weekly-backup.sh +++ b/scripts/weekly-backup.sh @@ -6,9 +6,6 @@ set -euo pipefail # --- Configuration --- BACKUP_ROOT="/mnt/backup" -NFS_SERVER="192.168.1.127" -NFS_BASE="/srv/nfs" -NFS_MOUNT="/mnt/nfs-proxmox" PVC_MOUNT="/tmp/pvc-mount" PUSHGATEWAY="${WEEKLY_BACKUP_PUSHGATEWAY:-http://10.0.20.100:30091}" PUSHGATEWAY_JOB="weekly-backup" @@ -18,9 +15,6 @@ MAPPING_CACHE="${BACKUP_ROOT}/.lv-pvc-mapping.json" KUBECONFIG="${KUBECONFIG:-/root/.kube/config}" export KUBECONFIG -# NFS backup directories — auto-discovered after NFS mount (all *-backup dirs) -BACKUP_DIRS=() - # --- Logging --- log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; } warn() { log "WARN: $*" >&2; } @@ -29,7 +23,6 @@ die() { log "FATAL: $*" >&2; push_metrics 1 0; exit 1; } # --- Locking --- cleanup() { umount "${PVC_MOUNT}" 2>/dev/null || true - umount "${NFS_MOUNT}" 2>/dev/null || true rm -f "${LOCKFILE}" } trap cleanup EXIT @@ -70,49 +63,11 @@ TOTAL_BYTES=0 # Clear manifest for this run > "${MANIFEST}" -# ============================================================ -# STEP 1: Mirror NFS backup directories from TrueNAS -# ============================================================ -log "--- Step 1: NFS backup mirror ---" -mkdir -p "${NFS_MOUNT}" -if ! mountpoint -q "${NFS_MOUNT}"; then - if ! timeout 30 mount -t nfs -o soft,timeo=30,retrans=3,ro "${NFS_SERVER}:${NFS_BASE}" "${NFS_MOUNT}"; then - warn "Failed to mount NFS — skipping NFS mirror step" - STATUS=1 - fi -fi - -if mountpoint -q "${NFS_MOUNT}"; then - # Auto-discover all *-backup directories (no hardcoded list) - for d in "${NFS_MOUNT}"/*-backup/; do - [ -d "$d" ] && BACKUP_DIRS+=("$(basename "$d")") - done - log " Discovered ${#BACKUP_DIRS[@]} backup dirs: ${BACKUP_DIRS[*]}" - - mkdir -p "${BACKUP_ROOT}/nfs-mirror" - for dir in "${BACKUP_DIRS[@]}"; do - src="${NFS_MOUNT}/${dir}/" - dst="${BACKUP_ROOT}/nfs-mirror/${dir}/" - mkdir -p "${dst}" - if [ ! -d "${src}" ]; then - continue - fi - log "Syncing ${dir}..." - if rsync -az --delete --out-format='%n' "${src}" "${dst}" 2>/dev/null | \ - sed "s|^|nfs-mirror/${dir}/|" >> "${MANIFEST}"; then - size=$(du -sb "${dst}" 2>/dev/null | cut -f1) - TOTAL_BYTES=$((TOTAL_BYTES + size)) - log " OK: ${dir} ($(du -sh "${dst}" | cut -f1))" - else - warn "Failed to sync ${dir}" - STATUS=1 - fi - done - umount "${NFS_MOUNT}" 2>/dev/null || true -fi +# NFS data is synced directly to Synology via inotifywait + offsite-sync-backup.sh +# No NFS mirror step on sda — saves 53GB and eliminates duplication. # ============================================================ -# STEP 2: PVC file-level copy from LVM thin snapshots +# STEP 1: PVC file-level copy from LVM thin snapshots # ============================================================ log "--- Step 2: PVC file copy from snapshots ---" WEEK=$(date +%Y-%W)