diff --git a/.claude/reference/proxmox-inventory.md b/.claude/reference/proxmox-inventory.md index daec0ff7..1430ae08 100644 --- a/.claude/reference/proxmox-inventory.md +++ b/.claude/reference/proxmox-inventory.md @@ -30,10 +30,10 @@ | 103 | home-assistant | running | 8 | 16GB | vmbr0 | 32G | HA, net0(vlan10) disabled | | 105 | pbs | stopped | 16 | 8GB | vmbr1:vlan10 | 32G | Proxmox Backup (unused) | | 200 | k8s-master | running | 8 | 16GB | vmbr1:vlan20 | 64G | Control plane (10.0.20.100) | -| 201 | k8s-node1 | running | 16 | 24GB | vmbr1:vlan20 | 128G | GPU node, Tesla T4 | -| 202 | k8s-node2 | running | 8 | 16GB | vmbr1:vlan20 | 64G | Worker | -| 203 | k8s-node3 | running | 8 | 16GB | vmbr1:vlan20 | 64G | Worker | -| 204 | k8s-node4 | running | 8 | 16GB | vmbr1:vlan20 | 64G | Worker | +| 201 | k8s-node1 | running | 16 | 24GB | vmbr1:vlan20 | 256G | GPU node, Tesla T4 | +| 202 | k8s-node2 | running | 8 | 16GB | vmbr1:vlan20 | 256G | Worker | +| 203 | k8s-node3 | running | 8 | 16GB | vmbr1:vlan20 | 256G | Worker | +| 204 | k8s-node4 | running | 8 | 16GB | vmbr1:vlan20 | 256G | Worker | | 220 | docker-registry | running | 4 | 4GB | vmbr1:vlan20 | 64G | MAC DE:AD:BE:EF:22:22 (10.0.20.10) | | 300 | Windows10 | running | 16 | 8GB | vmbr0 | 100G | Windows VM | | 9000 | truenas | running | 16 | 16GB | vmbr1:vlan10 | 32G+7x256G+1T | NFS (10.0.10.15) | diff --git a/docs/plans/2026-02-28-storage-reliability-design.md b/docs/plans/2026-02-28-storage-reliability-design.md index bcbefef3..80889ac6 100644 --- a/docs/plans/2026-02-28-storage-reliability-design.md +++ b/docs/plans/2026-02-28-storage-reliability-design.md @@ -173,7 +173,6 @@ Apps confirmed safe to migrate: | Forgejo | `[database]` in app.ini | `forgejo dump --database postgres` | Moderate | Git repos stay on NFS | | FreshRSS | `DB_HOST` env vars | OPML export/import (fresh install) | Low | PG is the recommended backend | | Open WebUI | `DATABASE_URL` env var | None (start fresh) | Low | Chat history is disposable | -| Vaultwarden | `DATABASE_URL` env var | pgloader (unsupported by maintainers) | **HIGH** | Test extensively; attachments stay on NFS | **Apps REMOVED from migration plan:** @@ -187,6 +186,7 @@ Apps confirmed SQLite/BoltDB-only (stay on NFS): | App | Storage engine | Mitigation | |-----|---------------|------------| | Headscale | SQLite (recommended by project) | Accept (project-recommended config) | +| Vaultwarden | SQLite | Defer (migration too risky for password vault) | | Uptime Kuma | SQLite (v2 adds MariaDB, not PG) | Accept or Litestream | | Navidrome | SQLite only | Accept or Litestream | | Audiobookshelf | SQLite only | Accept or Litestream | @@ -286,7 +286,6 @@ Migrate one at a time, safest first: 5a. FreshRSS (lowest risk — fresh install with OPML import) 5b. Open WebUI (low risk — start fresh, chat history disposable) 5c. Forgejo (moderate risk — use forgejo dump, verify git operations) -5d. Vaultwarden (HIGH risk — pgloader, test extensively on copy first) ### Phase 6: ClickHouse + Monitoring 6a. ClickHouse → local PVC diff --git a/scripts/extend_vm_storage.sh b/scripts/extend_vm_storage.sh index 4cc9b609..cf522a89 100755 --- a/scripts/extend_vm_storage.sh +++ b/scripts/extend_vm_storage.sh @@ -148,7 +148,7 @@ fi # --- Step 1: Drain node --- info "Step 1/7: Draining node '$NODE_NAME'..." DRAINED_NODE="$NODE_NAME" -if ! $KUBECTL drain "$NODE_NAME" --ignore-daemonsets --delete-emptydir-data --timeout=120s; then +if ! $KUBECTL drain "$NODE_NAME" --ignore-daemonsets --delete-emptydir-data --force --timeout=300s; then error "Failed to drain node '$NODE_NAME'." exit 1 fi