diff --git a/.claude/reference/proxmox-inventory.md b/.claude/reference/proxmox-inventory.md index 60dfab0b..1d1ab9bb 100644 --- a/.claude/reference/proxmox-inventory.md +++ b/.claude/reference/proxmox-inventory.md @@ -122,9 +122,8 @@ Channel 3: A4 [32G] ──── A8 [32G] ──── A12[ 8G ] = 72 GB | `offsite-sync-backup.timer` | Timer | Daily 06:00 | Two-step rsync to Synology (sda + NFS via inotify) | | `nfs-change-tracker.service` | Service | Continuous | inotifywait on `/srv/nfs` + `/srv/nfs-ssd`, logs to `/mnt/backup/.nfs-changes.log` | -## GPU Node (currently k8s-node1) -- **VMID**: 201, **PCIe**: `0000:06:00.0` (NVIDIA Tesla T4) — physical passthrough, no Terraform pin -- **Taint**: `nvidia.com/gpu=true:PreferNoSchedule` (applied dynamically to every NFD-discovered GPU node) -- **Label**: `nvidia.com/gpu.present=true` (auto-applied by gpu-feature-discovery; also `feature.node.kubernetes.io/pci-10de.present=true` from NFD) -- GPU workloads need: `node_selector = { "nvidia.com/gpu.present" : "true" }` + nvidia toleration -- Taint applied via `null_resource.gpu_node_config` in `stacks/nvidia/modules/nvidia/main.tf`; node discovery keyed on the NFD `pci-10de.present` label so the taint follows the card to whichever host is carrying it +## GPU Node (k8s-node1) +- **VMID**: 201, **PCIe**: `0000:06:00.0` (NVIDIA Tesla T4) +- **Taint**: `nvidia.com/gpu=true:NoSchedule`, **Label**: `gpu=true` +- GPU workloads need: `node_selector = { "gpu": "true" }` + nvidia toleration +- Taint applied via `null_resource.gpu_node_taint` in `modules/kubernetes/nvidia/main.tf` diff --git a/.woodpecker/default.yml b/.woodpecker/default.yml index fa6ffc4a..9e0d1fe5 100644 --- a/.woodpecker/default.yml +++ b/.woodpecker/default.yml @@ -128,7 +128,7 @@ steps: # ── Pre-warm provider cache ── - | if [ -s .platform_apply ] || [ -s .app_apply ]; then - FIRST_STACK=$(cat .platform_apply .app_apply 2>/dev/null | head -1) + FIRST_STACK=$(head -1 .platform_apply .app_apply 2>/dev/null | head -1) if [ -n "$FIRST_STACK" ]; then echo "Pre-warming provider cache from stacks/$FIRST_STACK..." cd "stacks/$FIRST_STACK" && terragrunt init --terragrunt-non-interactive -input=false 2>&1 | tail -3 && cd ../.. @@ -150,7 +150,7 @@ steps: if echo "$OUTPUT" | grep -q "is locked by"; then echo "[$stack] SKIPPED (locked by another session)" else - echo "$OUTPUT" | tail -50 + echo "$OUTPUT" | tail -5 echo "[$stack] FAILED (exit $EXIT)" FAILED_PLATFORM_STACKS="$FAILED_PLATFORM_STACKS $stack" fi @@ -178,7 +178,7 @@ steps: if echo "$OUTPUT" | grep -q "is locked by"; then echo "[$stack] SKIPPED (locked by another session)" else - echo "$OUTPUT" | tail -50 + echo "$OUTPUT" | tail -5 echo "[$stack] FAILED (exit $EXIT)" FAILED_APP_STACKS="$FAILED_APP_STACKS $stack" fi diff --git a/AGENTS.md b/AGENTS.md index 5f9c0839..0f1794f1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -163,10 +163,10 @@ lifecycle { ## Infrastructure - **Proxmox**: 192.168.1.127 (Dell R730, 22c/44t, 142GB RAM) - **Nodes**: k8s-master (10.0.20.100), node1 (GPU, Tesla T4), node2-4 -- **GPU**: `node_selector = { "nvidia.com/gpu.present" : "true" }` + toleration `nvidia.com/gpu`. The label is auto-applied by NFD/gpu-feature-discovery on any node with an NVIDIA PCI device — nothing is hostname-pinned, so the GPU card can move between nodes without Terraform edits. +- **GPU**: `node_selector = { "gpu": "true" }` + toleration `nvidia.com/gpu` - **Pull-through cache**: 10.0.20.10 — docker.io (:5000), ghcr.io (:5010) only. Caches stale manifests for :latest tags — use versioned tags or pre-pull with `ctr --hosts-dir ''` to bypass. - **pfSense**: 10.0.20.1 (gateway, firewall, DNS forwarding) -- **MySQL InnoDB Cluster**: 1 instance on proxmox-lvm (scaled from 3 — only Uptime Kuma + phpIPAM remain), PriorityClass `mysql-critical` + PDB, anti-affinity excludes any GPU node (`nvidia.com/gpu.present=true`) so MySQL moves off the GPU host automatically if the card is relocated +- **MySQL InnoDB Cluster**: 1 instance on proxmox-lvm (scaled from 3 — only Uptime Kuma + phpIPAM remain), PriorityClass `mysql-critical` + PDB, anti-affinity excludes k8s-node1 (GPU node) - **SMTP**: `var.mail_host` port 587 STARTTLS (not internal svc address — cert mismatch) ## Contributor Onboarding diff --git a/ci/Dockerfile b/ci/Dockerfile index 2a02b586..ea534d6e 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -1,11 +1,12 @@ FROM alpine:3.20 +# Rebuild 2026-04-19 — previous :latest index referenced missing blobs (404 on 98f718c8 / 27d5ab83) + # Pin versions to match CI requirements ARG TERRAFORM_VERSION=1.5.7 ARG TERRAGRUNT_VERSION=0.99.4 ARG SOPS_VERSION=3.9.4 ARG KUBECTL_VERSION=1.34.0 -ARG VAULT_VERSION=1.18.1 # Install system packages (single layer) RUN apk add --no-cache \ @@ -35,16 +36,6 @@ RUN curl -fsSL "https://dl.k8s.io/release/v${KUBECTL_VERSION}/bin/linux/amd64/ku -o /usr/local/bin/kubectl \ && chmod +x /usr/local/bin/kubectl -# Vault CLI — required by scripts/tg for Tier 1 stack PG credential reads -# and Tier 0 advisory locks. Pinned to server version (1.18.1). Without this -# the CI pipeline surfaces the misleading "Cannot read PG credentials" error -# because scripts/tg swallows stderr ("vault: not found"). -RUN curl -fsSL "https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip" \ - -o /tmp/vault.zip \ - && unzip /tmp/vault.zip -d /usr/local/bin/ \ - && rm /tmp/vault.zip \ - && vault version - # Provider cache directory (shared across stacks) ENV TF_PLUGIN_CACHE_DIR=/tmp/terraform-plugin-cache ENV TF_PLUGIN_CACHE_MAY_BREAK_DEPENDENCY_LOCK_FILE=1 diff --git a/config.tfvars b/config.tfvars index 790a48ae..6db48575 100644 Binary files a/config.tfvars and b/config.tfvars differ diff --git a/docs/architecture/backup-dr.md b/docs/architecture/backup-dr.md index b307ec6c..2c992c20 100644 --- a/docs/architecture/backup-dr.md +++ b/docs/architecture/backup-dr.md @@ -217,7 +217,7 @@ graph LR Native LVM thin snapshots provide crash-consistent point-in-time recovery for 62 Proxmox CSI PVCs. These are CoW snapshots — instant creation, minimal overhead, sharing the thin pool's free space. -**Script**: `/usr/local/bin/lvm-pvc-snapshot` on PVE host (source: `infra/scripts/lvm-pvc-snapshot.sh`). Deploy: `scp infra/scripts/lvm-pvc-snapshot.sh root@192.168.1.127:/usr/local/bin/lvm-pvc-snapshot` +**Script**: `/usr/local/bin/lvm-pvc-snapshot` on PVE host (source: `infra/scripts/lvm-pvc-snapshot`) **Schedule**: Daily 03:00 via systemd timer, 7-day retention **Discovery**: Auto-discovers PVC LVs matching `vm-*-pvc-*` pattern in VG `pve` thin pool `data` @@ -226,7 +226,7 @@ Native LVM thin snapshots provide crash-consistent point-in-time recovery for 62 - They already have app-level dumps (Layer 2) - Including them causes ~36% write amplification; excluding them reduces overhead to ~0% -**Monitoring**: Pushes metrics to Pushgateway via NodePort (30091). Alerts: `LVMSnapshotStale` (>30h since last run + 30m `for:`), `LVMSnapshotFailing`, `LVMThinPoolLow` (<15% free). +**Monitoring**: Pushes metrics to Pushgateway via NodePort (30091). Alerts: `LVMSnapshotStale` (>24h), `LVMSnapshotFailing`, `LVMThinPoolLow` (<15% free). **Restore**: `lvm-pvc-snapshot restore ` — auto-discovers K8s workload, scales down, swaps LVs, scales back up. See `docs/runbooks/restore-lvm-snapshot.md`. @@ -234,7 +234,7 @@ Native LVM thin snapshots provide crash-consistent point-in-time recovery for 62 **Backup disk**: sda (1.1TB RAID1 SAS) → VG `backup` → LV `data` → ext4 → mounted at `/mnt/backup` on PVE host. Dedicated backup disk, independent of live storage. -**Script**: `/usr/local/bin/daily-backup` on PVE host (source: `infra/scripts/daily-backup.sh`) +**Script**: `/usr/local/bin/daily-backup` on PVE host (source: `infra/scripts/daily-backup`) **Schedule**: Daily 05:00 via systemd timer **Retention**: 4 weekly versions (weeks 0-3 via `--link-dest` hardlink dedup) @@ -673,7 +673,7 @@ module "nfs_backup" { │ ~~CloudSyncNeverRun~~ REMOVED (TrueNAS decommissioned) │ │ ~~CloudSyncFailing~~ REMOVED (TrueNAS decommissioned) │ │ VaultwardenIntegrityFail integrity_ok == 0 │ -│ LVMSnapshotStale > 30h since last snapshot │ +│ LVMSnapshotStale > 24h since last snapshot │ │ LVMSnapshotFailing snapshot creation failed │ │ LVMThinPoolLow < 15% free space in thin pool │ │ WeeklyBackupStale > 8d since last success │ @@ -692,16 +692,6 @@ module "nfs_backup" { - ~~CloudSync monitor~~: Removed (TrueNAS decommissioned) - Vaultwarden integrity: Pushes `vaultwarden_sqlite_integrity_ok` hourly -**Pushgateway persistence**: The Pushgateway is configured with -`--persistence.file=/data/pushgateway.bin --persistence.interval=1m` -on a 2Gi `proxmox-lvm-encrypted` PVC (helm values: -`prometheus-pushgateway.persistentVolume`). Without this, every pod -restart drops in-memory metrics. Once-per-day pushers (offsite-sync, -weekly backup) are otherwise invisible for up to 24h if the -Pushgateway restarts between pushes — which is exactly what triggered -the 2026-04-22 backup_offsite_sync FAIL (node3 kubelet hiccup at -11:42 UTC terminated the Pushgateway 8h after the 03:12 UTC push). - **Alert routing**: - All backup alerts → Slack `#infra-alerts` - Vaultwarden integrity fail → Slack `#infra-critical` (immediate action required) diff --git a/docs/architecture/compute.md b/docs/architecture/compute.md index cc9c4786..bf456030 100644 --- a/docs/architecture/compute.md +++ b/docs/architecture/compute.md @@ -18,7 +18,7 @@ graph TB subgraph Proxmox["Proxmox VE"] direction TB MASTER["VM 200: k8s-master
8c / 32GB
10.0.20.100"] - NODE1["VM 201: k8s-node1
16c / 32GB
GPU Passthrough
nvidia.com/gpu=true:PreferNoSchedule"] + NODE1["VM 201: k8s-node1
16c / 32GB
GPU Passthrough
nvidia.com/gpu=true:NoSchedule"] NODE2["VM 202: k8s-node2
8c / 32GB"] NODE3["VM 203: k8s-node3
8c / 32GB"] NODE4["VM 204: k8s-node4
8c / 32GB"] @@ -72,7 +72,7 @@ graph TB | VM | VMID | vCPUs | RAM | Network | Role | Taints | |----|------|-------|-----|---------|------|--------| | k8s-master | 200 | 8 | 32GB | vmbr1:vlan20 (10.0.20.100) | Control Plane | `node-role.kubernetes.io/control-plane:NoSchedule` | -| k8s-node1 | 201 | 16 | 32GB | vmbr1:vlan20 | GPU Worker | `nvidia.com/gpu=true:PreferNoSchedule` (applied dynamically to whichever node carries the GPU) | +| k8s-node1 | 201 | 16 | 32GB | vmbr1:vlan20 | GPU Worker | `nvidia.com/gpu=true:NoSchedule` | | k8s-node2 | 202 | 8 | 32GB | vmbr1:vlan20 | Worker | None | | k8s-node3 | 203 | 8 | 32GB | vmbr1:vlan20 | Worker | None | | k8s-node4 | 204 | 8 | 32GB | vmbr1:vlan20 | Worker | None | @@ -85,9 +85,9 @@ graph TB |-----------|-------| | Device | NVIDIA Tesla T4 (16GB GDDR6) | | PCIe Address | 0000:06:00.0 | -| Assigned VM | VMID 201 (k8s-node1) — physical location only, no Terraform pin | -| Node Label | `nvidia.com/gpu.present=true` (auto-applied by gpu-feature-discovery; also `feature.node.kubernetes.io/pci-10de.present=true` from NFD) | -| Node Taint | `nvidia.com/gpu=true:PreferNoSchedule` (applied by `null_resource.gpu_node_config` to every NFD-tagged GPU node) | +| Assigned VM | VMID 201 (k8s-node1) | +| Node Label | `gpu=true` | +| Node Taint | `nvidia.com/gpu=true:NoSchedule` | | Driver | NVIDIA GPU Operator | | Resource Name | `nvidia.com/gpu` | @@ -273,8 +273,8 @@ resources { ### GPU Resource Management **Node Selection**: GPU pods must: -1. Tolerate `nvidia.com/gpu=true:PreferNoSchedule` taint -2. Select `nvidia.com/gpu.present=true` label (auto-applied by gpu-feature-discovery wherever the card is) +1. Tolerate `nvidia.com/gpu=true:NoSchedule` taint +2. Select `gpu=true` label 3. Request `nvidia.com/gpu: 1` resource **Example**: @@ -286,7 +286,7 @@ spec: value: "true" effect: NoSchedule nodeSelector: - nvidia.com/gpu.present: "true" + gpu: "true" containers: - name: app resources: @@ -294,14 +294,6 @@ spec: nvidia.com/gpu: 1 ``` -**Portability**: No Terraform code references a specific hostname for -GPU scheduling. If the GPU card is physically moved to a different -node, gpu-feature-discovery moves the `nvidia.com/gpu.present=true` -label with it, and `null_resource.gpu_node_config` re-applies the -`nvidia.com/gpu=true:PreferNoSchedule` taint to the new host on the -next apply (discovery keyed on -`feature.node.kubernetes.io/pci-10de.present=true`). - **GPU Workloads**: - Ollama (LLM inference) - ComfyUI (Stable Diffusion workflows) @@ -537,7 +529,7 @@ kubectl describe pod -n ``` 0/5 nodes are available: 5 Insufficient nvidia.com/gpu. ``` - **Fix**: Verify the GPU-carrying node is Ready and has the `nvidia.com/gpu.present=true` label. Check `kubectl get nodes -l nvidia.com/gpu.present=true` — if empty, gpu-feature-discovery hasn't labeled any node (operator not running, driver not loaded, or PCI passthrough broken). + **Fix**: Verify GPU node (201) is Ready and labeled `gpu=true`. ### Pods OOMKilled repeatedly @@ -622,7 +614,7 @@ spec: value: "true" effect: NoSchedule nodeSelector: - nvidia.com/gpu.present: "true" + gpu: "true" containers: - name: app resources: diff --git a/docs/architecture/databases.md b/docs/architecture/databases.md index c47fcb3d..810fe85c 100644 --- a/docs/architecture/databases.md +++ b/docs/architecture/databases.md @@ -127,13 +127,9 @@ Single shared cluster for all 17 consumers (Immich, Authentik, Nextcloud, Paperl 3 pods in StatefulSet `redis-v2`, each co-locating redis + sentinel + redis_exporter, using `docker.io/library/redis:8-alpine` (8.6.2). HAProxy (3 replicas, PDB minAvailable=2) routes clients to the current master via 1s `INFO replication` tcp-checks. Full context behind the April 2026 rework in beads `code-v2b`. - 3 redis pods + 3 co-located sentinels (quorum=2). Odd sentinel count eliminates split-brain. -- **Pod anti-affinity is `required` (hard)** — each redis pod must land on a distinct node. Soft anti-affinity previously let the scheduler co-locate 2/3 pods on the same node; when that node (`k8s-node3`) went `NotReady→Ready` at 11:42 UTC on 2026-04-22 it took 2 redis pods with it and the cluster lost quorum. Cluster-wide PV `nodeAffinity` matches one zone (`topology.kubernetes.io/region=pve, zone=pve`), so PVCs rebind freely on reschedule. - `podManagementPolicy=Parallel` + init container that regenerates `sentinel.conf` on every boot by probing peer sentinels for consensus master (priority: sentinel vote → peer role:master with slaves → deterministic pod-0 fallback). No persistent sentinel runtime state — can't drift out of sync with reality (root cause of 2026-04-19 PM incident). - redis.conf has `include /shared/replica.conf`; the init container writes either an empty file (master) or `replicaof 6379` (replicas), so pods come up already in the right role — no bootstrap race. - **Sentinel hostname persistence**: `sentinel resolve-hostnames yes` + `sentinel announce-hostnames yes` in the init-generated sentinel.conf are mandatory — without them, sentinel stores resolved IPs in its rewritten config, and pod-IP churn on restart breaks failover. The MONITOR command itself must be issued with a hostname and the flags must be active before MONITOR, otherwise sentinel stores an IP that goes stale the next time the pod is deleted. -- **Failover timing (tuned 2026-04-22)**: `sentinel down-after-milliseconds=15000` + `sentinel failover-timeout=60000`. Redis liveness probe `timeout_seconds=10, failure_threshold=5`; sentinel liveness probe same. LUKS-encrypted LVM + BGSAVE fork can briefly stall master I/O >5s, which under the old 5s/30s sentinel timings + 3s/3 probes induced spurious `+sdown`→`+odown`→`+switch-master` cycles every 1-2 minutes. The new values absorb normal BGSAVE pauses without triggering failover. -- **HAProxy check smoothing (tuned 2026-04-22)**: `check inter 2s fall 3 rise 2` (was `1s / 2 / 2`) + `timeout check 5s` (was `3s`). The aggressive 1s polling used to race sentinel failovers — during a legitimate promote, HAProxy could catch the old master serving `role:slave` in the 1-3s window before re-probing the new master, leaving the backend empty and clients receiving `ReadOnlyError`. -- **Headless service `publish_not_ready_addresses=false`** (flipped 2026-04-22). Previously `true` meant HAProxy's DNS resolver saw not-yet-ready pods during rollouts, compounding the check-race above. Sentinel peer discovery is unaffected because sentinels announce to each other explicitly via `sentinel announce-hostnames yes`. - Memory: master + replicas `requests=limits=768Mi`. Concurrent BGSAVE + AOF-rewrite fork can double RSS via COW, so headroom must cover it. `auto-aof-rewrite-percentage=200` + `auto-aof-rewrite-min-size=128mb` tune down rewrite frequency. - Persistence: RDB (`save 900 1 / 300 100 / 60 10000`) + AOF `appendfsync=everysec`. Disk-wear analysis on 2026-04-19 (sdb Samsung 850 EVO 1TB, 150 TBW): Redis contributes <1 GB/day cluster-wide → 40+ year runway at the 20% TBW budget. - `maxmemory=640mb` (83% of 768Mi limit), `maxmemory-policy=allkeys-lru`. @@ -142,7 +138,7 @@ Single shared cluster for all 17 consumers (Immich, Authentik, Nextcloud, Paperl **Observability** (redis-v2 only): `oliver006/redis_exporter:v1.62.0` sidecar per pod on port 9121, auto-scraped via Prometheus pod annotation. Alerts: `RedisDown`, `RedisMemoryPressure`, `RedisEvictions`, `RedisReplicationLagHigh`, `RedisForkLatencyHigh`, `RedisAOFRewriteLong`, `RedisReplicasMissing`, `RedisBackupStale`, `RedisBackupNeverSucceeded`. -**Why this design** — four incidents in April 2026 drove the rework: (a) 2026-04-04 service selector routed reads+writes to master+replica causing `READONLY` errors; (b) 2026-04-19 AM master OOMKilled during BGSAVE+PSYNC with the 256Mi limit too tight for a 204 MB working set under COW amplification; (c) 2026-04-19 PM sentinel runtime state drifted (only 2 sentinels, no majority) and routed writes to a slave; (d) 2026-04-22 five-factor flap cascade — soft anti-affinity let 2/3 pods co-locate on `k8s-node3`, node bounced NotReady→Ready and took quorum with it; aggressive sentinel/probe timing (5s/30s + 3s/3) amplified disk-I/O stalls under LUKS-encrypted LVM into spurious `+switch-master` loops; HAProxy's 1s polling raced sentinel failovers and routed writes to demoted masters; `publish_not_ready_addresses=true` fed not-yet-ready pods into HAProxy DNS; downstream `realestate-crawler-celery` CrashLoopBackOff closed the feedback loop. See beads epic `code-v2b` for the full plan and linked challenger analyses. +**Why this design** — three incidents in April 2026 drove the rework: (a) 2026-04-04 service selector routed reads+writes to master+replica causing `READONLY` errors; (b) 2026-04-19 AM master OOMKilled during BGSAVE+PSYNC with the 256Mi limit too tight for a 204 MB working set under COW amplification; (c) 2026-04-19 PM sentinel runtime state drifted (only 2 sentinels, no majority) and routed writes to a slave. See beads epic `code-v2b` for the full plan and linked challenger analyses. ### SQLite (Per-App) diff --git a/docs/architecture/mailserver.md b/docs/architecture/mailserver.md index 0026b932..21b2f957 100644 --- a/docs/architecture/mailserver.md +++ b/docs/architecture/mailserver.md @@ -231,7 +231,7 @@ Push secrets (`BREVO_API_KEY`, `EMAIL_MONITOR_IMAP_PASSWORD`) come from External |-------|-----------|----------| | MailServerDown | No replicas for 5m | warning | | EmailRoundtripFailing | Probe failing for 30m | warning | -| EmailRoundtripStale | No success in >80m (60m threshold + for:20m) | warning | +| EmailRoundtripStale | No success in >40m | warning | | EmailRoundtripNeverRun | Metric absent for 40m | warning | ### Uptime Kuma Monitors diff --git a/docs/architecture/monitoring.md b/docs/architecture/monitoring.md index 5fa3bbba..0de2a219 100644 --- a/docs/architecture/monitoring.md +++ b/docs/architecture/monitoring.md @@ -158,7 +158,7 @@ spec: #### Email Monitoring Alerts - **EmailRoundtripFailing**: E2E email probe returning failure for >30m -- **EmailRoundtripStale**: No successful email round-trip in >80m (60m threshold + for:20m) +- **EmailRoundtripStale**: No successful email round-trip in >40m - **EmailRoundtripNeverRun**: Email probe has never reported (40m) #### Registry Integrity Alerts diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md index cb0f8e6d..9e0fe7be 100644 --- a/docs/architecture/overview.md +++ b/docs/architecture/overview.md @@ -139,7 +139,7 @@ The Kubernetes cluster consists of 5 nodes: - **k8s-node1 (201)**: 16c/32GB GPU node with Tesla T4 passthrough, tainted for GPU workloads only - **k8s-node2-4 (202-204)**: 8c/32GB workers running general-purpose workloads -GPU passthrough on node1 uses PCIe device 0000:06:00.0. The NVIDIA GPU Operator's gpu-feature-discovery auto-labels whichever node carries the card with `nvidia.com/gpu.present=true`; `null_resource.gpu_node_config` taints the same set of nodes with `nvidia.com/gpu=true:PreferNoSchedule`. No hostname is hardcoded — moving the card to a different node requires no Terraform edits. +GPU passthrough on node1 uses PCIe device 0000:06:00.0, with Kubernetes taint `nvidia.com/gpu=true:NoSchedule` and label `gpu=true` to ensure only GPU-requesting pods schedule there. ### Service Organization diff --git a/docs/architecture/storage.md b/docs/architecture/storage.md index df1e89f9..69b32a1a 100644 --- a/docs/architecture/storage.md +++ b/docs/architecture/storage.md @@ -129,9 +129,7 @@ graph TB 5. **Passphrase management**: ExternalSecret syncs passphrase from Vault KV (`secret/viktor/proxmox_csi_encryption_passphrase`) → K8s Secret. Backup key at `/root/.luks-backup-key` on PVE host. **Services on encrypted storage (2026-04-15 migration):** -vaultwarden, dbaas (mysql+pg+pgadmin), mailserver, nextcloud, forgejo, matrix, n8n, affine, health, hackmd, redis, headscale, frigate, meshcentral, technitium, actualbudget, grampsweb, owntracks, wealthfolio, monitoring (alertmanager) - -**Services migrated later** (post-audit catch-up): paperless-ngx (2026-04-25 — sensitive document scans had been left on plain `proxmox-lvm` by an abandoned attempt; rsync swap cleaned up the orphan and re-did via Terraform). Vault raft cluster (2026-04-25 — all 3 voters migrated from `nfs-proxmox` to `proxmox-lvm-encrypted` after the 2026-04-22 raft-leader-deadlock post-mortem found NFS fsync semantics incompatible with raft consensus log; rolled non-leader-first with force-finalize on the pvc-protection finalizer to avoid pod-recreating on the old PVCs). +vaultwarden, dbaas (mysql+pg+pgadmin), mailserver, nextcloud, forgejo, matrix, n8n, affine, health, hackmd, redis, headscale, frigate, meshcentral, technitium, actualbudget, grampsweb, owntracks, paperless-ngx, wealthfolio, monitoring (alertmanager) **CSI node plugin memory**: Requires 1280Mi limit for LUKS2 Argon2id key derivation (~1GiB). Set via `node.plugin.resources` in Helm values (not `node.resources`). diff --git a/docs/plans/2026-04-25-nfs-hostile-migration-design.md b/docs/plans/2026-04-25-nfs-hostile-migration-design.md deleted file mode 100644 index 832064ea..00000000 --- a/docs/plans/2026-04-25-nfs-hostile-migration-design.md +++ /dev/null @@ -1,142 +0,0 @@ -# NFS-Hostile Workload Migration — Design - -**Date**: 2026-04-25 -**Author**: Viktor (with Claude) -**Status**: Phase 1 done, Phase 2 in progress -**Beads**: code-gy7h (Vault), code-ahr7 (Immich PG) - -## Problem - -The 2026-04-22 Vault Raft leader deadlock (post-mortem -`2026-04-22-vault-raft-leader-deadlock.md`) traced to NFS client -writeback stalls poisoning kernel state. Recovery took 2h43m and -required hard-resetting 3 of 4 cluster VMs. Two workload classes on -NFS are NFS-hostile per the criteria in -`infra/.claude/CLAUDE.md` ("Critical services MUST NOT use NFS"): - -1. **Postgres with WAL fsync per commit** — Immich primary -2. **Vault Raft consensus log** — fsync per append-entry, 3 replicas - -Everything else on NFS (47 PVCs, ~455 GiB) is correctly placed: -RWX media libraries, append-only backups, ML caches. - -## Decision - -Migrate exactly those two workload classes to -`proxmox-lvm-encrypted` (LUKS2 LVM-thin via Proxmox CSI). No iSCSI, -no RWX media migration, no backup-target migration. - -## Rationale - -- Block storage decouples PG / Raft fsync from NFS client kernel - state. Failure mode that triggered the post-mortem cannot recur for - these workloads. -- `proxmox-lvm-encrypted` is the documented default for sensitive data - (`infra/.claude/CLAUDE.md` storage decision rule). It already backs - ~28 PVCs across the cluster — pattern is proven. -- Existing nightly `lvm-pvc-snapshot` PVE host script (03:00, 7-day - retention) auto-picks-up new PVCs via thin snapshots — no extra - backup wiring needed for the live data side. -- LUKS2 satisfies "encrypted at rest for sensitive data" requirement. - -## Out of scope - -- iSCSI evaluation (already retired 2026-04-13). -- RWX media (Immich library, music, ebooks) — correct placement. -- Backup target PVCs (`*-backup` on NFS) — append-only, NFS-tolerant. -- Prometheus 200 GiB — already on `proxmox-lvm`. - -## Pattern per workload - -### Immich PG (single replica, Deployment, Recreate strategy) - -- Add new RWO PVC on `proxmox-lvm-encrypted`. -- Quiesce app pods (server + ML + frame). -- `pg_dumpall` from running NFS pod → local file. -- Swap deployment `claim_name` → encrypted PVC. -- PG bootstraps fresh on empty PVC; restore dump. -- REINDEX vector indexes (`clip_index`, `face_index`). -- Backup CronJob keeps writing to NFS module (correct: append-only). - -### Vault Raft (3 replicas, StatefulSet, helm-managed) - -- Change `dataStorage.storageClass` and `auditStorage.storageClass` - from `nfs-proxmox` → `proxmox-lvm-encrypted`. -- StatefulSet `volumeClaimTemplates` is immutable → use - `kubectl delete sts vault --cascade=orphan` then re-apply (memory - pattern for VCT swaps). -- Per-pod rolling: delete pod + PVCs, controller recreates with new - template. Auto-unseal sidecar handles unseal; raft `retry_join` - rejoins cluster. -- 24h validation window between pods. Migrate non-leader pods first; - step-down current leader before migrating it last. -- Backup target (`vault-backup-host` on NFS) stays on NFS. - -## Risks and rollbacks - -### Immich PG - -- pg_dumpall captures schema + data, not file-level state. Vector - index versions matter (vchord 0.3.0 unchanged; vector 0.8.0 → - 0.8.1 is a minor automatic bump on `CREATE EXTENSION` — confirmed - benign). Rollback: revert `claim_name`, scale apps; old NFS PVC - retained for 7 days post-migration. - -### Vault Raft - -- Cluster keeps quorum from 2 standby replicas while one pod is - swapped. Migrating the leader last avoids quorum churn. -- Recovery anchor: pre-migration `vault operator raft snapshot save` - + nightly `vault-raft-backup` CronJob. RTO < 1h via snapshot - restore. - -## Helm `securityContext.pod` replace-not-merge (Vault, discovered during execution) - -The Vault helm chart sets pod-level securityContext defaults -(`fsGroup=1000, runAsGroup=1000, runAsUser=100, runAsNonRoot=true`) -from chart templates, not from values.yaml. When `main.tf` provided -its own `server.statefulSet.securityContext.pod = {fsGroupChangePolicy -= "OnRootMismatch"}` the helm rendering REPLACED the chart defaults -rather than merging into them. On NFS this was harmless (`async, -insecure` exports made the volume world-writable enough for any UID), -but on a fresh ext4 LV via Proxmox CSI the volume root is `root:root` -and vault user (UID 100) cannot open `/vault/data/vault.db`. - -vault-1 and vault-2 happened to be Running with the correct -securityContext because their pod specs were written into etcd -**before** the customization landed; helm chart upgrades don't -restart pods, so the broken values lay dormant until vault-0 was -recreated by the orphan-deleted STS during this migration. - -Resolution: provide all five fields (`fsGroup`, `fsGroupChangePolicy`, -`runAsGroup`, `runAsUser`, `runAsNonRoot`) explicitly in main.tf so -`runAsGroup=1000` etc. survive future chart bumps. Idempotent on -both fresh PVCs and existing pods. - -## Init container chicken-and-egg (Immich PG, discovered during execution) - -The pre-existing `write-pg-override-conf` init container on the -Immich PG deployment writes `postgresql.override.conf` directly to -`PGDATA`. On a populated NFS PVC this was a no-op (init was already -run). On the fresh encrypted PVC, the file made `initdb` refuse the -non-empty directory and the pod CrashLoopBackOff'd. - -Resolution: gate the init container on `PG_VERSION` presence — first -boot skips the override write, PG `initdb`s cleanly; force a pod -restart and the second boot writes the override and PG loads -`vchord` / `vectors` / `pg_prewarm` before the dump restore. Change -is permanent and idempotent (correct on both fresh and initialised -PVCs). One restart pre-migration only. - -## Verification - -End-to-end DONE when: - -- `kubectl get pvc -A | grep nfs-proxmox` returns only the - `vault-backup-host` PVC (or zero, if backup PVC moves elsewhere). -- `vault operator raft list-peers` shows 3 voters on - `proxmox-lvm-encrypted`, leader elected. -- Immich PG `\dx` matches pre-migration extensions (vector minor - drift OK). -- `lvm-pvc-snapshot` captures new LVs in next 03:00 run. -- 7 consecutive days of clean backup CronJob runs and no new alerts. diff --git a/docs/plans/2026-04-25-nfs-hostile-migration-plan.md b/docs/plans/2026-04-25-nfs-hostile-migration-plan.md deleted file mode 100644 index f24c562a..00000000 --- a/docs/plans/2026-04-25-nfs-hostile-migration-plan.md +++ /dev/null @@ -1,169 +0,0 @@ -# NFS-Hostile Workload Migration — Plan - -**Date**: 2026-04-25 -**Design**: `2026-04-25-nfs-hostile-migration-design.md` -**Beads**: code-gy7h (Vault, epic), code-ahr7 (Immich PG) - -## Phase 1 — Immich PG (DONE 2026-04-25) - -| Step | Done | -|---|---| -| Snapshot extensions + row counts to `/tmp/immich-pre-migration-*` | ✓ | -| Quiesce `immich-server` + `immich-machine-learning` + `immich-frame` | ✓ | -| `pg_dumpall` → `/tmp/immich-pre-migration-.sql` (1.9 GB) | ✓ | -| Add `kubernetes_persistent_volume_claim.immich_postgresql_encrypted` (10Gi, autoresize 20Gi cap) | ✓ | -| Swap `claim_name` at `infra/stacks/immich/main.tf` deployment | ✓ | -| Patch init container to gate on `PG_VERSION` (chicken-and-egg fix) | ✓ | -| Force pod restart so override.conf gets written | ✓ | -| Restore dump | ✓ | -| `REINDEX clip_index`, `REINDEX face_index` | ✓ | -| Scale apps back up | ✓ | -| Verify: `\dx`, row counts (~111k assets), HTTP 200 internal/external | ✓ | -| LV present on PVE host (`vm-9999-pvc-...`) | ✓ | - -### Phase 1 follow-ups (not blocking) - -- Old NFS PVC `immich-postgresql-data-host` retained 7 days for - rollback. After 2026-05-02: remove `module.nfs_postgresql_host` - from `infra/stacks/immich/main.tf` and the CronJob's reference. -- Backup CronJob (`postgresql-backup`) still writes to the NFS - module. After cleanup, point it at a dedicated backup PVC or to - the existing `immich-backups` NFS share. - -## Phase 2 — Vault Raft (DONE 2026-04-25) - -**Phase 2 complete 2026-04-25; all 3 voters on `proxmox-lvm-encrypted`.** - -### Pre-flight (T-0) — DONE 2026-04-25 15:50 UTC - -- [x] Verify all 3 vault pods sealed=false, raft healthy. -- [x] Take fresh `vault operator raft snapshot save` (anchor saved at - `/tmp/vault-pre-migration-20260425-155029.snap`, 1.5 MB). -- [ ] Optional: scale ESO to 0 — skipped (auto-unseal sidecar is - independent; ESO refresh churn is non-disruptive for one swap). -- [x] Confirmed leader is **vault-2** → migrate vault-0 first - (non-leader), vault-1 next, vault-2 last (with step-down). - Plan originally assumed vault-0 was leader; same intent - (non-leader first). -- [x] Thin pool headroom: 54.63% used, plenty for 6 × 2 GiB LVs. - -### Step 0 — Helm values + StatefulSet swap — DONE 2026-04-25 16:08 UTC - -- [x] Edit `infra/stacks/vault/main.tf`: change - `dataStorage.storageClass` and `auditStorage.storageClass` - from `nfs-proxmox` → `proxmox-lvm-encrypted`. -- [x] `kubectl -n vault delete sts vault --cascade=orphan` (StatefulSet - `volumeClaimTemplates` is immutable; orphan keeps pods+PVCs - alive while we recreate the controller with the new template). -- [x] `tg apply -target=helm_release.vault` → recreates STS with new - VCT (full-stack `tg plan` blocks on unrelated for_each-with- - apply-time-keys errors at lines 848/865/909/917; targeted - apply on the helm release alone is the right scope here). - Existing pods still on old NFS PVCs. - -### Step 1 — Roll vault-0 first (non-leader) — DONE 2026-04-25 16:18 UTC - -- [x] `kubectl -n vault delete pod vault-0 --grace-period=30` -- [x] `kubectl -n vault delete pvc data-vault-0 audit-vault-0` -- [x] STS controller recreated pod; new PVCs auto-provisioned on - `proxmox-lvm-encrypted` (LVs `vm-9999-pvc-fb732fd7-...` data - 4.12%, `vm-9999-pvc-36451f42-...` audit 3.99%). -- [x] **Hit and fixed**: vault-0 CrashLoopBackOff'd with - `permission denied` on `/vault/data/vault.db`. The helm chart's - `statefulSet.securityContext.pod` block in main.tf only set - `fsGroupChangePolicy`, replacing (not merging) the chart's - defaults `fsGroup=1000, runAsGroup=1000, runAsUser=100, - runAsNonRoot=true`. NFS exports made the missing fsGroup a - no-op; ext4 LV needs it to chown the volume root for the - vault user. Old vault-1/vault-2 pods were created before that - block was added so they still had the chart-default - securityContext from their original spec. Fix: provide all - five fields explicitly in main.tf and re-apply. Same root - cause will affect vault-1 and vault-2 swaps unless this stays - in place. -- [x] Wait Ready; auto-unseal sidecar unsealed; `retry_join` rejoined - raft cluster. -- [x] Verify: `vault operator raft list-peers` shows 3 voters, - vault-0 follower, leader=vault-2. External HTTPS 200. - -### Step 2 — 24h soak (SKIPPED per user direction 2026-04-25) - -User instructed "continue with all the remaining actions" — soak -gates compressed to per-pod settle windows + raft-state verification -between rollings. No Raft alarms, no Vault errors observed at each -verification gate. - -### Step 3 — Roll vault-1 — DONE 2026-04-25 - -- [x] Force-finalize PVCs to break re-mount race: - `kubectl -n vault patch pvc data-vault-1 audit-vault-1 -p '{"metadata":{"finalizers":null}}' --type=merge`. - (Initial pod-then-PVC delete recreated pod on the OLD NFS PVCs - because pvc-protection finalizer hadn't cleared. Lesson learned - and applied to vault-2 below.) -- [x] Pod recreated on encrypted PVCs; auto-unsealed; rejoined raft. - -### Step 4 — Settle window — DONE 2026-04-25 - -3-check verification over 90s; raft index advancing (2730010→2730012), -all 3 voters healthy. - -### Step 5 — Roll vault-2 (leader) — DONE 2026-04-25 - -- [x] `vault operator step-down` on vault-2; vault-0 took leadership. - Confirmed vault-0 active, vault-1+vault-2 standby before delete. -- [x] Snapshot anchor at `/tmp/vault-pre-vault2.snap` (1.5 MB) from new - leader vault-0. -- [x] Force-finalize + delete PVCs + delete pod (lesson from vault-1). -- [x] Pod recreated on encrypted PVCs; auto-unsealed; rejoined raft. -- [x] `vault operator raft list-peers` shows 3 voters all healthy on - encrypted storage; leader vault-0. - -### Step 6 — Cleanup — DONE 2026-04-25 - -- [x] `kubectl get pvc -A` cross-cluster shows zero PVCs on - `nfs-proxmox` SC (only Released PVs remain → Phase 3). -- [x] Removed inline `kubernetes_storage_class.nfs_proxmox` from - `infra/stacks/vault/main.tf` (was lines 29–42). -- [x] All 3 PVC pairs on `proxmox-lvm-encrypted`. -- [x] `vault operator raft autopilot state` healthy=true. -- [x] External `https://vault.viktorbarzin.me/v1/sys/health` = 200. - -## Phase 3 — Released-PV cleanup (FOLLOW-UP) - -### Step 3.1 — vault Released PVs — DONE 2026-04-25 - -6 vault NFS PVs (Released, `nfs-proxmox` SC, Retain policy) deleted -along with their NFS subdirectories on PVE host (~1.5 GB reclaimed): - -| PV | Claim | Size on disk | -|---|---|---| -| pvc-004a5d3b-… | data-vault-2 | 45M | -| pvc-808a78ec-… | audit-vault-1 | 1.4M | -| pvc-918ee7c1-… | audit-vault-0 | 3.2M | -| pvc-9d2ddcb4-… | data-vault-0 | 46M | -| pvc-a659711d-… | data-vault-1 | 46M | -| pvc-d2e65109-… | audit-vault-2 | 1.4G | - -Procedure: `kubectl delete pv ` (cluster object only — Retain -policy means CSI never touches NFS) then `rm -rf /srv/nfs/` on -192.168.1.127. - -### Step 3.2 — Cluster-wide Released PV sweep (DEFERRED) - -~50 other Released PVs persist across the cluster (~200 GiB on -`proxmox-lvm` and `proxmox-lvm-encrypted`). Out of scope for the -2026-04-25 NFS-hostile session per user direction. To reclaim: - -1. List Released PVs, confirm LV exists on PVE. -2. `kubectl delete pv ` (CSI removes underlying LV when PV is - orphaned with `Retain` reclaim policy and no PVC reference). -3. If LV survives: manual `lvremove pve/vm-9999-pvc-`. - -## Rollback - -| Phase | Trigger | Action | -|---|---|---| -| 1 | Immich UI broken / data loss | Revert `claim_name`; restore from `/tmp/immich-pre-migration-*.sql` to old NFS PVC | -| 2 (mid-rolling) | Single pod broken | Delete the encrypted PVC; recreate with NFS SC explicitly; cluster keeps quorum from 2 healthy pods | -| 2 (post-rolling, raft corrupt) | Cluster-wide failure | `vault operator raft snapshot restore ` | -| Catastrophic | All Vault data lost | Restore from latest `/srv/nfs/vault-backup/` snapshot via CronJob output | diff --git a/docs/post-mortems/2026-04-19-registry-orphan-index.md b/docs/post-mortems/2026-04-19-registry-orphan-index.md index ee596c63..da883760 100644 --- a/docs/post-mortems/2026-04-19-registry-orphan-index.md +++ b/docs/post-mortems/2026-04-19-registry-orphan-index.md @@ -190,57 +190,3 @@ unaddressed. - **Runbook**: `docs/runbooks/registry-rebuild-image.md` (new). - **Hot-fix commits**: `a05d63ee`, `6371e75e`, `c113be4d`. - **Upstream bug class**: `distribution/distribution#3324`. - -## 2026-04-19 — Bulk cleanup sweep (beads code-8hk + code-jh3c) - -Same failure class, broader scope. The `registry-integrity-probe` -surfaced 38 broken manifest references persisting after the 04-19 -infra-ci fix. `beads-dispatcher` + `beads-reaper` CronJobs were stuck -`ImagePullBackOff` on `claude-agent-service:0c24c9b6` for >6h. All 34 -affected `repo:tag` pairs were OCI indexes whose `linux/amd64` child -manifests were absent from blob storage (same orphan pattern). - -**Action taken**: -1. Bumped `beads-server/main.tf` var default `claude_agent_service_image_tag` - from `0c24c9b6` → `2fd7670d` (the canonical tag in - `claude-agent-service/main.tf`), reused — same image already healthy - on the registry. `scripts/tg apply` on `beads-server`. Deleted the - stuck Jobs so new CronJob ticks could fire. -2. Enumerated 34 broken `(repo, tag, parent_digest)` triples via HTTP - probe using `registry-probe-credentials` K8s Secret. Deleted each - via `DELETE /v2//manifests/` (33× 202, 1× 404 — - claude-agent-service:latest pointed at an already-deleted digest). -3. Ran `docker exec registry-private /bin/registry garbage-collect - /etc/docker/registry/config.yml` — reclaimed ~3GB of orphan blob - storage. -4. Rebuilt the 3 in-use broken tags (all 3 OCI-index parents pointed - at missing children, so no cached copies would survive pod - reschedule): - - `freedify:latest` / `freedify:c803de02` — built on registry VM - directly (no CI pipeline exists for this image; python FastAPI). - - `beadboard:17a38e43` / `beadboard:latest` — GHA - `workflow_dispatch` failed at registry login (missing - `REGISTRY_USERNAME`/`REGISTRY_PASSWORD` GH secrets). Built on - registry VM directly as the fallback. GitHub secret gap is a - follow-up — beads `code-8hk` notes it. - - `priority-pass-backend:ae1420a0` / `priority-pass-frontend:ae1420a0` - — Woodpecker pipeline #8 on repo 81. Pipeline `kubectl set image`'d - the Deployment to `ae1420a0` (drift vs TF `v5`/`v8` defaults, but - that drift is pre-existing, not introduced by this cleanup). - - `wealthfolio-sync:latest` — **not rebuilt**. Monthly CronJob (next - run 2026-05-01), no source tree or CI pipeline available in the - monorepo; deferred for separate follow-up. - -**Post-cleanup state**: -- Probe: 39 tags, 0 failures. `registry_manifest_integrity_failures{} = 0`. -- Alert `RegistryManifestIntegrityFailure` cleared (was firing for - 5h 32m). -- No `ImagePullBackOff` pods anywhere in the cluster. -- 28 of 34 deleted manifests were **dangling tags not referenced by any - workload** — old `382d6b1*`, `v2`-`v7`, `yt-fallback`, etc. Safe - deletes, no rebuilds needed. - -**Permanent fix still in flight**: Phase 2/3 of this post-mortem -(post-push verification in CI, atomic `cleanup-tags.sh`) — not -addressed by this cleanup. The probe continues to be the -authoritative detector. diff --git a/docs/post-mortems/2026-04-22-vault-raft-leader-deadlock.md b/docs/post-mortems/2026-04-22-vault-raft-leader-deadlock.md deleted file mode 100644 index dcbb8e02..00000000 --- a/docs/post-mortems/2026-04-22-vault-raft-leader-deadlock.md +++ /dev/null @@ -1,155 +0,0 @@ -# Post-Mortem: Vault Raft Leader Deadlock + NFS Kernel Client Corruption Cascade - -> **Resolution status (2026-04-25):** Resolved structurally by code-gy7h -> migration. All 3 vault voters now on `proxmox-lvm-encrypted` block -> storage; the NFS fsync incompatibility that triggered the original -> raft hang is no longer reachable. See -> `docs/plans/2026-04-25-nfs-hostile-migration-plan.md` Phase 2. - -| Field | Value | -|-------|-------| -| **Date** | 2026-04-22 | -| **Duration** | External endpoint 503 from ~09:00 UTC to ~11:43 UTC (~2h 43m). vault-2 became active leader 11:43:28 UTC. | -| **Severity** | SEV1 (Vault — single source of secrets for 40+ services) | -| **Affected Services** | All ESO-backed services (password rotation paused). CronJobs that read plan-time secrets (14 stacks). Woodpecker CI (blocked pipeline `d39770b3`). Everything with `ExternalSecret` refresh interval ≤ 2h. | -| **Status** | Vault HA operational with vault-0 + vault-2 quorum. vault-1 still stuck ContainerCreating on node2 (third node2 reboot pending; workload can accept 2/3 quorum). Terraform fix committed as `2f1f9107`; apply pending. | - -## Summary - -A Vault raft leader (`vault-2`) entered a stuck goroutine state where its cluster port (8201) accepted TCP but never completed msgpack RPC. Standbys could not detect leader death because the TCP layer looked healthy, so no re-election fired. The only recovery was to kill the leader. During recovery, abrupt `kubectl delete --force` of the stuck Vault pods left kernel-side NFS client state on k8s-node1/node3/node4 in a corrupted state — **all new NFS mounts from those nodes timed out at 110s**, while existing mounts kept working. This created a cascade: the stuck leader blocked quorum, killing the leader broke NFS on the destination node for the recreated pod, force-killing the stuck pods left zombie `containerd-shim` processes kubelet couldn't clean up, and the resulting volume-manager loops pegged kubelet into 2-minute timeouts. Recovery required a VM hard-reset for node2 and node3 (kubelet was zombie on both). vault-0 remains down pending node4 reboot. - -## Impact - -- **User-facing**: `vault.viktorbarzin.me` returned HTTP 503 for ~2h. Any service that needed a Vault token during that window was degraded; Woodpecker CI pipeline blocked. -- **Blast radius**: 3/3 Vault pods affected (raft deadlock blocked re-election even with standbys up). Three k8s nodes degraded simultaneously with kernel NFS client stuck state (node1, node3, node4). Two nodes required VM hard-reset to recover kubelet (node2, node3). -- **Duration**: Degraded ~2h; resolution required sequential hard reboots. -- **Data loss**: None. Raft data integrity preserved on NFS. vault-1 came up with index 2475732, caught up to 2476009+ once leader was elected. -- **Observability gap**: No alert fired for the stuck raft leader. Standbys report `HA Mode: standby, Active Node Address: ` as if healthy even when leader is hung. - -## Timeline (UTC) - -| Time | Event | -|------|-------| -| **~09:00** | `vault-2` (original raft leader) enters hung state — port 8201 open but msgpack RPCs hang. Its own logs go silent. Standbys continue heartbeat/appendEntries with `msgpack decode error [pos 0]: i/o timeout`. Neither standby triggers re-election because raft transport does not distinguish "TCP open + silent" from "TCP open + healthy". | -| **~09:15** | External endpoint starts serving 503. Woodpecker CI pipeline `d39770b3` blocks waiting for Vault. | -| **09:59** | Operator force-deletes `vault-2` pod — replacement comes up on node3 and enters candidate loop (term=32), cannot get quorum because DNS for `vault-0` is NXDOMAIN (ContainerCreating) and vault-1 does not respond (its raft goroutine also hung). | -| **10:07** | Operator force-deletes `vault-1` — new `vault-1` gets scheduled to node2. Its raft would be fine, but kubelet on node2 hangs in the pod cleanup path for the old pod's NFS mount. Concurrently, a new `vault-0` pod is attempted on node4, but **NFS mount from node4 times out at 110s** — the host kernel NFS client is in a degraded state that blocks all new mounts (including to completely different NFS paths like `/srv/nfs/ytdlp`). | -| **10:09** | Diagnostic test: from node1 and node4 CSI pods, `mount -t nfs -o nfsvers=4 192.168.1.127:/srv/nfs/ytdlp /tmp/test` times out. From node2 and node3 the same mount succeeds. NFS server is healthy (`showmount -e` works; `rpcinfo` shows all programs registered). The common factor on the broken nodes: they had a force-terminated Vault pod earlier in the session, leaving stuck `mount.nfs` processes in D-state. | -| **10:18** | Manual unmount of stale NFS mount from the force-deleted old vault-0 pod on node4. New mount attempts from CSI still time out — clearing the old mount did not recover kernel NFS client state. | -| **10:22** | Workaround discovered: mounting with `nfsvers=4.0` or `nfsvers=4.1` (instead of default `nfsvers=4` which negotiates to 4.2) succeeds on broken nodes. Confirms the stuck state is version-specific (NFSv4.2 session state), not a general NFS issue. Decision: rather than change CSI mount options cluster-wide (risk of remounting existing 48+ PVs), fix the nodes directly. | -| **10:31** | Investigated node2 kubelet state: old `vault-1` container shows `vault` process in **Z (zombie)** state with its `sh` wrapper stuck in `do_wait` in kernel (`zap_pid_ns_processes`). Containerd-shim PID killed manually — `sh` and zombie reparented to init but remained stuck (uninterruptible kernel wait tied to NFS). | -| **10:34** | Attempted `systemctl restart kubelet` on node2 — kubelet itself went into Z (zombie) with 2 tasks still attached. Classic NFS-related kernel deadlock. | -| **10:42** | **Decision: hard-reset node2 VM** (`qm reset 202`). Disruption: 22 pods evicted. | -| **10:43** | node2 back up (Ready). CSI registered. New `vault-1` scheduled to node2. NFS mount succeeded (fresh kernel state). Kubelet began chowning volume — **extremely slow, ~3 files per minute over NFS**. | -| **10:48** | `vault-1` (2/2 Running) unsealed. **Raft leader elected: `vault-2` wins term 32, election tally=2** (vault-1 voted yes once it came up, vault-0 unreachable). However vault-2's vault-layer (HA active/standby) never transitioned to active — raft leader with `active_time: 0001-01-01T00:00:00Z` and `/sys/ha-status` returning 500. | -| **10:50** | Restarted `vault-2` pod to force clean leader transition. New `vault-2` stuck in chown loop on node3 (same pattern as node2 earlier). | -| **10:54** | Patched the Vault `StatefulSet` with `fsGroupChangePolicy: OnRootMismatch` so subsequent recreations skip the recursive chown. | -| **10:57** | Force-deleted `vault-2` and `06fa940b` pod directory on node3. New pod spawned but kubelet again stuck on phantom state from the old pod. | -| **11:01** | **Hard-reset node3 VM** (`qm reset 203`). | -| **11:03** | First 200 response: vault-1 elected leader, vault-2 standby. Premature celebration — vault-1's audit log on node2 NFS starts timing out; `/sys/ha-status` returns 500 even though raft thinks vault-1 is active. | -| **~11:18** | Service regresses. `vault-1` audit writes hanging (`event not processed by enough 'sink' nodes, context deadline exceeded`). Readiness probe fails; pod goes 1/2; `vault-active` endpoint stays pointed at vault-1's IP but backend unresponsive → 503. | -| **11:22** | Force-restart `vault-1` to trigger re-election with new pod. Delete + containerd-shim cleanup leaves yet another zombie on node2. Same pattern: force-delete → zombie. | -| **11:29** | **Hard-reset node4 VM** (`qm reset 204`). Rationale: vault-0 was still blocked there; 74 pods on node4 contribute to NFS server load (load avg 16 on PVE). After reboot, vault-0 mounts its PVCs on fresh kernel state and comes up 2/2 Running 11:31. | -| **11:31** | Increased PVE NFS threads from 16 to 64 (`echo 64 > /proc/fs/nfsd/threads`). Did not help immediate mount failures — the stuck state is per-client kernel, not server capacity. | -| **11:38** | Discover DNS resolution issue: vault-2's Go resolver returns NXDOMAIN for short names `vault-0.vault-internal` even though glibc resolver works. CoreDNS restart issued earlier didn't fix. Restart vault-2 pod to force fresh resolver state. | -| **11:42** | **Second hard-reset of node3 VM** (`qm reset 203`). Kubelet+CSI re-register; vault-2 scheduled, NFS mounts finally succeed on fresh kernel state. | -| **11:43:28** | **vault-2 becomes active leader.** External endpoint returns 200 and stays there. vault-0 follower, catches up to index 2477632+. vault-1 still stuck on node2; left for later recovery. | - -## Root Cause Chain - -``` -[1] Vault-2 raft goroutine hang (root cause — upstream Vault bug or infra-induced) - └─> Cluster port 8201 accepts TCP but never responds to msgpack RPCs - └─> Standbys' appendEntries calls return `msgpack decode error [pos 0]: i/o timeout` - └─> Raft protocol: no re-election because leader is heartbeating at the TCP level - └─> External endpoint returns 503 because HA layer has no active leader - -[2] Recovery complication — abrupt pod termination - └─> `kubectl delete --force --grace-period=0` on vault-0/1/2 - └─> containerd-shim fails to kill container cleanly (NFS I/O in D-state) - └─> vault process ends as zombie; sh wrapper stuck in do_wait - └─> Kubelet retries forever, cannot tear down old pod volumes - └─> NFS-CSI unmount requests succeed at the NFS layer but kubelet's - volume state-machine never marks the volume as unmounted - (stale 0000-mode mount directory blocks teardown completion) - -[3] Kernel NFS client corruption on node1/node4 - └─> Force-terminated Vault pod left stuck `mount.nfs` processes in D-state - └─> Kernel NFS4.2 client session state corrupted (held open mount slot) - └─> All subsequent mount syscalls for nfsvers=4 block 110s+ waiting for - session slot that will never be freed - └─> Manual workaround: nfsvers=4.1 bypasses the corrupted session state - -[4] Kubelet starvation - └─> Combination of (2) and (3) means kubelet is stuck in a 2-minute volume-setup - context deadline loop — each iteration times out, new iteration restarts, - infinite loop - └─> Hard VM reset is the only exit - └─> After reset, kubelet starts clean, CSI re-registers, mounts succeed - -[5] Slow recursive chown amplifies impact - └─> Default fsGroupChangePolicy: Always (Vault Helm chart 0.29.1 default) - └─> Kubelet walks every file on NFS setting gid=1000 - └─> Over a 1GB audit log and a 47MB raft.db on NFS with timeo=30,retrans=3, - each chown syscall takes seconds; kubelet 2-minute deadline runs out - before the walk finishes - └─> Loop never exits even when ownership is already correct -``` - -## Why This Failed - -1. **Raft transport does not detect stuck leaders.** If TCP is open and the process is alive enough to hold the port, standbys assume the leader is healthy. A stuck goroutine that never responds to RPCs appears to raft as "leader with high RTT" and does not trigger re-election. This is an upstream Vault bug (or at least a missing liveness check). - -2. **Abrupt pod termination + NFS = kernel-level zombie.** When a Vault pod holding an NFS mount is force-killed before it cleanly closes file handles, the kernel's NFS4.2 client session state enters a corrupted state. This blocks all new mounts from that node — not just to the same NFS path, but to ANY NFS path on the same server. The fix is a kernel reboot; there is no userspace recovery. - -3. **Vault data on NFS violates the documented rule.** `infra/.claude/CLAUDE.md` explicitly states: *"Critical services MUST NOT use NFS storage — circular dependency risk."* Vault currently uses `nfs-proxmox` for both `dataStorage` and `auditStorage`. If Vault had been on `proxmox-lvm-encrypted`, none of the NFS corruption cascade would have happened. - -4. **fsGroupChangePolicy: Always is the Helm default.** Every pod restart walks every file over NFS. On a 1GB audit log with degraded NFS RTT, this takes longer than kubelet's internal 2-minute deadline, causing infinite restart loops. `OnRootMismatch` makes chown a no-op when the root is already correct (which it always is after first setup). - -5. **No alert for this failure mode.** Prometheus alerts exist for `VaultSealed`, `VaultDown` (`up` metric), and backup staleness, but none for "raft leader has been running without advancing commit index" or "standby reports leader but leader's `/sys/ha-status` returns 500". - -## Remediation (Applied) - -- [x] Hard-reset node2 and node3 VMs to clear kernel NFS state and kubelet zombies. -- [x] Manually patched live `StatefulSet vault/vault` with `fsGroupChangePolicy: OnRootMismatch` to stop the chown loop. -- [x] Lazy-unmounted stale NFS mounts from force-deleted pod directories on node2 and node3. -- [x] Removed stale kubelet pod directories (`/var/lib/kubelet/pods/`) that had 0000-mode mount subdirectories blocking teardown. -- [x] Updated `stacks/vault/main.tf` with the `fsGroupChangePolicy` setting so the next `scripts/tg apply vault` makes it durable. - -## Remediation (Pending) - -- [ ] **Hard-reset node4** to recover vault-0 (same NFS kernel corruption pattern). -- [ ] **Run `scripts/tg apply` on the vault stack** to persist the fsGroupChangePolicy change. -- [ ] **Add Prometheus alert `VaultRaftLeaderStuck`** — fire when `vault_raft_last_index_gauge` (or derivation from `vault_runtime_total_gc_runs`) stops advancing for >2 minutes while `vault_core_active` is 1. -- [ ] **Add Prometheus alert `VaultHAStatusUnavailable`** — fire when `vault_core_active{}` reports 0 across all pods but `up{job="vault"}` reports 1 (HA layer broken but pods alive). -- [ ] **Migrate Vault to `proxmox-lvm-encrypted` block storage** — eliminates the entire NFS failure class. This follows the rule already documented in `infra/.claude/CLAUDE.md`. Tracked as beads task (open after Dolt is back up; currently down on node4). -- [ ] **Consider raising kubelet volume-manager deadline** for large-volume chown scenarios, or document the `fsGroupChangePolicy: OnRootMismatch` requirement for all NFS-backed StatefulSets. -- [ ] **Runbook**: `docs/runbooks/vault-raft-leader-deadlock.md` — how to detect stuck leader, safe force-restart procedure that avoids zombie pods, NFS kernel state recovery. - -## Contributing Factors - -1. **NFS mount options use bare `nfsvers=4`**. This negotiates to the highest version the server supports (NFSv4.2). When 4.2 session state corrupts, mounts fail; 4.1 works. Pinning to `nfsvers=4.1` in the `nfs-proxmox` StorageClass would make the failure mode recoverable without node reboot, but would also require recreating 48+ existing PVs (volumeAttributes are immutable). Deferred. - -2. **`kubectl delete --force` is the default for stuck pods**. Operators reach for force-delete when a pod won't terminate, but this leaves containerd in an inconsistent state when the underlying storage is hung. Better approach: identify the stuck process (typically `mount.nfs` or a kernel NFS callback) and fix the root cause before force-deleting. - -3. **Beads / Dolt server was on node4**, so beads task tracking went offline during this incident and couldn't be used to log progress cross-session. - -4. **node1 was cordoned mid-incident** to prevent rescheduling to a node with confirmed NFS issues, but this reduced the scheduling surface for anti-affinity-sensitive StatefulSets. - -## Learnings - -1. **NFS for stateful critical services is structurally unsafe.** When NFS breaks, the recovery involves killing pods → which can break NFS further → until a reboot. The rule exists for a reason; Vault should never have been on NFS. - -2. **Raft liveness needs application-layer probing, not TCP.** Every time we've seen a "stuck leader" issue in the homelab, TCP was fine and the app was unresponsive. A lightweight RPC probe with a short timeout and Prometheus alert would catch this in minutes instead of hours. - -3. **kubelet volume-manager is fragile against stuck NFS.** Once kubelet enters a chown loop with a context deadline shorter than the chown duration, it cannot make progress — even when the filesystem is otherwise healthy. `OnRootMismatch` is effectively mandatory for any pod with `fsGroup` and a volume >100MB. - -4. **VM hard-reset is cheap but disruptive.** The two reboots took ~60 seconds each but evicted 22+44 = 66 pods. Doing this twice in one session is a lot of churn. A post-mortem-driven improvement: pre-prepare "hot-standby" capacity so we can cordon+drain instead of hard-reset when kubelet zombies appear. - -5. **Documentation of this rule is worth more than the rule itself.** The CLAUDE.md already says "critical services must not use NFS". The vault stack violates it. The rule without enforcement (validation, linting, CI) is ignored during the rush to ship. - -## References - -- Related: `docs/post-mortems/2026-04-14-nfs-fsid0-dns-vault-outage.md` — previous Vault+NFS incident (different root cause, similar blast pattern). -- Vault helm chart 0.29.1 default `fsGroupChangePolicy` is unset (behaves as `Always`). -- Upstream Vault HA layer: raft leader → vault-active transition is in `vault/external_tests/raft`. Stuck goroutine pattern not documented as a known issue. diff --git a/docs/runbooks/vault-raft-leader-deadlock.md b/docs/runbooks/vault-raft-leader-deadlock.md deleted file mode 100644 index 5b4f1ece..00000000 --- a/docs/runbooks/vault-raft-leader-deadlock.md +++ /dev/null @@ -1,217 +0,0 @@ -# Runbook: Vault Raft Leader Deadlock + Safe Pod Restart - -Captures the 2026-04-22 incident pattern. When a Vault raft leader enters a -stuck goroutine state (port 8201 accepts TCP but RPCs never return), the -recovery is *not* `kubectl delete --force`. Force-deleting a Vault pod that -holds a stuck NFS mount leaves kernel NFS client state corrupted, which -blocks all subsequent NFS mounts from the node and usually requires a VM -hard-reset to clear. - -**Related**: [post-mortems/2026-04-22-vault-raft-leader-deadlock.md](../post-mortems/2026-04-22-vault-raft-leader-deadlock.md). - -## Symptoms - -- `https://vault.viktorbarzin.me/v1/sys/health` returns HTTP 503. -- Standbys log `msgpack decode error [pos 0]: i/o timeout` every 2s. -- `kubectl exec` into a standby shows raft thinks the leader is alive - (peers list all `Voter`, leader address populated) but `vault operator - raft autopilot state` stalls or errors. -- The "leader" pod's logs go silent — no heartbeats, no audit writes, - nothing. TCP on 8201 still accepts connections. -- ESO-backed secrets stop refreshing (ExternalSecret `SecretSyncedError`). -- Woodpecker CI pipelines that read from Vault at plan time hang. - -## 0. Confirm the diagnosis (before touching anything) - -Don't jump to force-delete. Verify the leader is actually stuck, not just -slow: - -```sh -# 1. Who does raft think the leader is? -kubectl exec -n vault vault-0 -c vault -- vault status 2>&1 | \ - grep -E 'HA Mode|Active Node|Leader|Raft' - -# 2. Is the leader's port open but unresponsive? -LEADER_POD=vault-2 # or whichever vault status reports -kubectl exec -n vault $LEADER_POD -c vault -- sh -c \ - 'timeout 3 nc -zv 127.0.0.1 8200 2>&1; echo; timeout 3 vault status' - -# 3. Is the active vault service pointing at a real pod? -kubectl get endpoints -n vault vault-active -o yaml | \ - grep -E 'addresses|notReadyAddresses' -A2 - -# 4. What do standby logs say? -kubectl logs -n vault vault-0 -c vault --tail=40 | grep -iE 'msgpack|decode|rpc' -``` - -If (2) hangs and (4) shows repeated msgpack errors → stuck leader. - -## 1. Identify the stuck pod precisely - -```sh -# Find the pod whose vault_core_active would be 1 if it were scraping -# (currently no telemetry — use logs as proxy until telemetry is enabled). -for p in vault-0 vault-1 vault-2; do - echo "=== $p ===" - kubectl logs -n vault $p -c vault --tail=5 2>&1 | head -5 -done | grep -B1 'no recent output' -``` - -The pod whose logs have been silent for minutes while the others are -actively erroring is the stuck leader. - -## 2. The safe restart sequence (avoids zombie containers) - -**DO NOT** `kubectl delete pod --force --grace-period=0` as the first -step. On NFS-backed Vault that's the exact move that leaves the kernel -NFS client corrupted on the node where the stuck pod ran. - -Instead: - -### 2a. Graceful delete first (30s grace) - -```sh -kubectl delete pod -n vault vault-2 -``` - -Wait 30 seconds. Most of the time the TERM → SIGKILL path works and the -new pod schedules cleanly. The remaining leaders re-elect and the external -endpoint recovers. - -### 2b. If the pod is Terminating after 60s, find the stuck process - -```sh -NODE=$(kubectl get pod -n vault vault-2- -o jsonpath='{.spec.nodeName}') -POD_UID=$(kubectl get pod -n vault vault-2- -o jsonpath='{.metadata.uid}') - -ssh $NODE "sudo ps auxf | grep -A2 $POD_UID | head -20" -# Look for: mount.nfs (D-state), vault (Z-state), or the sh wrapper in do_wait -``` - -### 2c. Unmount stale NFS before force-deleting - -If the old pod's NFS mount is still present, lazy-unmount it FIRST so -the kernel can release NFS session state cleanly: - -```sh -ssh $NODE "sudo mount | grep $POD_UID | awk '{print \$3}' | xargs -I{} sudo umount -l {}" -``` - -Verify no mount.nfs processes are in D-state on the node: - -```sh -ssh $NODE "ps -eo state,pid,comm | grep '^D' | head -5" -``` - -### 2d. Only NOW force-delete if needed - -```sh -kubectl delete pod -n vault vault-2- --force --grace-period=0 -``` - -## 3. Recovery when the node is already stuck - -If you force-deleted before reading this runbook and NFS is now broken -on the node: - -**Diagnostic — confirm NFS client state is corrupted:** - -```sh -NODE=k8s-node2 # node where the force-delete happened -ssh $NODE "sudo mkdir -p /tmp/nfstest && sudo timeout 30 \ - mount -t nfs 192.168.1.127:/srv/nfs /tmp/nfstest && echo MOUNT_OK" -``` - -If the mount times out at 30-110s, kernel NFS client state is stuck. -No userspace recovery exists — only a VM reboot clears it. - -**Workaround before rebooting**: mounting with `nfsvers=4.1` succeeds -on broken nodes (the corruption is NFSv4.2 session-state specific). -This is useful for diagnostic mounts, but does NOT fix CSI pods — -their mount options come from the `nfs-proxmox` StorageClass and can't -be overridden per-pod. - -**Reboot the affected node VM:** - -```sh -# Find PVE VM ID — nodes numbered 201-204 for k8s-node1..4 -ssh root@192.168.1.127 "qm reset 20" - -# If qm reset leaves the VM PID unchanged (it didn't actually reboot), -# use qm stop/start: -ssh root@192.168.1.127 "qm stop 20 && qm start 20" -``` - -Wait for the node to become Ready (`kubectl get node k8s-node -w`) -and CSI driver to register (`kubectl get pods -n nfs-csi -o wide`). - -**Gotcha — `qm reset` can be a no-op.** On the 2026-04-22 incident, -`qm reset 201` returned exit 0 but did NOT restart the VM (same QEMU PID -before and after). `qm status` reported "running" throughout. Always -verify by checking the QEMU PID or VM uptime post-reset. If uptime is -unchanged, escalate to `qm stop && qm start`. - -**Gotcha — check boot order before stop/start.** Long-running VMs -(630+ day uptime) may have stale `bootdisk:` config that's been hidden -by never rebooting. On 2026-04-22, k8s-node1's config had `bootdisk: -scsi0` but the actual OS disk was on `scsi1`, so the first boot after -stop attempted iPXE and failed. Before stopping, verify: - -```sh -ssh root@192.168.1.127 "grep -E 'boot|scsi[0-9]+:' /etc/pve/qemu-server/20.conf" -``` - -If `bootdisk` references a disk ID that doesn't exist, fix it first -with `qm set 20 --boot "order=scsi"` (use the ID of the main -OS disk). - -## 4. Prevent re-infection — the chown loop - -After the node comes back, the vault pod's PV chown walk can still -peg kubelet. The durable fix is in `stacks/vault/main.tf`: - -```hcl -statefulSet = { - securityContext = { - pod = { - fsGroupChangePolicy = "OnRootMismatch" - } - } -} -``` - -This was applied in commit `2f1f9107` (2026-04-22). If you find -yourself editing this in a kubectl patch for live recovery, follow -up with a Terraform apply the same session — leaving the cluster -ahead of Terraform state is technical debt that re-triggers on the -next apply. - -## 5. Verify end-to-end - -```sh -# External endpoint — the user-facing health check -curl -sk -o /dev/null -w "%{http_code}\n" https://vault.viktorbarzin.me/v1/sys/health -# expect: 200 - -# Raft peers (needs VAULT_TOKEN with operator capability) -kubectl exec -n vault vault-0 -c vault -- vault operator raft list-peers - -# All pods 2/2 -kubectl get pods -n vault -l app.kubernetes.io/name=vault -o wide - -# No alerts fired (once VaultRaftLeaderStuck + VaultHAStatusUnavailable are live) -curl -s https://alertmanager.viktorbarzin.me/api/v2/alerts | \ - jq '.[] | select(.labels.alertname | test("Vault"))' -``` - -## Known limitations - -- **No alert for stuck leaders yet.** `VaultRaftLeaderStuck` and - `VaultHAStatusUnavailable` require Vault telemetry enabled - (`telemetry { unauthenticated_metrics_access = true }`) and a - scrape job. Alerts are defined in `prometheus_chart_values.tpl` - but stay silent until telemetry lands — tracked as a beads task. -- **Vault on NFS violates the documented rule.** `infra/.claude/CLAUDE.md` - says critical services must use `proxmox-lvm-encrypted`. The - `dataStorage`/`auditStorage` still use `nfs-proxmox`. Migration - tracked as an epic-level beads task. diff --git a/scripts/cluster_healthcheck.sh b/scripts/cluster_healthcheck.sh index b5237378..997c0b7d 100755 --- a/scripts/cluster_healthcheck.sh +++ b/scripts/cluster_healthcheck.sh @@ -1242,17 +1242,9 @@ check_overcommit() { HA_CACHE_DIR="" ha_sofia_available() { - if [[ -z "${HOME_ASSISTANT_SOFIA_URL:-}" ]]; then - export HOME_ASSISTANT_SOFIA_URL="https://ha-sofia.viktorbarzin.me" + if [[ -z "${HOME_ASSISTANT_SOFIA_URL:-}" ]] || [[ -z "${HOME_ASSISTANT_SOFIA_TOKEN:-}" ]]; then + return 1 fi - if [[ -z "${HOME_ASSISTANT_SOFIA_TOKEN:-}" ]]; then - if command -v vault >/dev/null 2>&1 && [[ -n "${VAULT_TOKEN:-}${HOME:-}" ]]; then - local t - t=$(vault kv get -field=haos_api_token secret/viktor 2>/dev/null || true) - [[ -n "$t" ]] && export HOME_ASSISTANT_SOFIA_TOKEN="$t" - fi - fi - [[ -n "${HOME_ASSISTANT_SOFIA_TOKEN:-}" ]] || return 1 return 0 } @@ -1760,25 +1752,14 @@ else: json_add "hardware_exporters" "$status" "${detail:-All healthy}" } -# Returns 0 if cert-manager CRDs are installed, 1 otherwise. -cert_manager_installed() { - $KUBECTL get crd certificates.cert-manager.io -o name >/dev/null 2>&1 -} - # --- 31. cert-manager: Certificate Readiness --- check_cert_manager_certificates() { section 31 "cert-manager — Certificate Readiness" local certs not_ready detail="" status="PASS" - if ! cert_manager_installed; then - pass "cert-manager not installed — N/A" - json_add "certmanager_certificates" "PASS" "N/A (cert-manager not installed)" - return 0 - fi - certs=$($KUBECTL get certificates.cert-manager.io -A -o json 2>/dev/null) || { - warn "cert-manager CRDs installed but API query failed" - json_add "certmanager_certificates" "WARN" "API query failed" + warn "cert-manager CRDs not installed or inaccessible" + json_add "certmanager_certificates" "WARN" "CRDs unavailable" return 0 } @@ -1816,15 +1797,9 @@ check_cert_manager_expiry() { section 32 "cert-manager — Certificate Expiry (<14d)" local certs expiring detail="" status="PASS" - if ! cert_manager_installed; then - pass "cert-manager not installed — N/A" - json_add "certmanager_expiry" "PASS" "N/A (cert-manager not installed)" - return 0 - fi - certs=$($KUBECTL get certificates.cert-manager.io -A -o json 2>/dev/null) || { - warn "cert-manager CRDs installed but API query failed" - json_add "certmanager_expiry" "WARN" "API query failed" + warn "cert-manager CRDs not installed or inaccessible" + json_add "certmanager_expiry" "WARN" "CRDs unavailable" return 0 } @@ -1877,15 +1852,9 @@ check_cert_manager_requests() { section 33 "cert-manager — Failed CertificateRequests" local requests failed detail="" status="PASS" - if ! cert_manager_installed; then - pass "cert-manager not installed — N/A" - json_add "certmanager_requests" "PASS" "N/A (cert-manager not installed)" - return 0 - fi - requests=$($KUBECTL get certificaterequests.cert-manager.io -A -o json 2>/dev/null) || { - warn "cert-manager CRDs installed but API query failed" - json_add "certmanager_requests" "WARN" "API query failed" + warn "cert-manager CRDs not installed or inaccessible" + json_add "certmanager_requests" "WARN" "CRDs unavailable" return 0 } @@ -2029,7 +1998,7 @@ check_backup_lvm_snapshots() { local snap_output detail="" status="PASS" snap_output=$(ssh -o BatchMode=yes -o ConnectTimeout=5 -o StrictHostKeyChecking=no \ - root@192.168.1.127 "lvs -o lv_name,lv_time --noheadings 2>/dev/null | grep _snap" 2>/dev/null || true) + root@192.168.1.127 "lvs -o lv_name,lv_time --noheadings 2>/dev/null | grep -- -snap" 2>/dev/null || true) if [[ -z "$snap_output" ]]; then [[ "$QUIET" == true ]] && section_always 36 "Backup Freshness — LVM PVC Snapshots" diff --git a/scripts/lvm-pvc-snapshot.sh b/scripts/lvm-pvc-snapshot.sh deleted file mode 100755 index 6ec5dc34..00000000 --- a/scripts/lvm-pvc-snapshot.sh +++ /dev/null @@ -1,469 +0,0 @@ -#!/usr/bin/env bash -# lvm-pvc-snapshot — LVM thin snapshot management for Proxmox CSI PVCs -# Deploy to PVE host at /usr/local/bin/lvm-pvc-snapshot -set -euo pipefail - -# --- Configuration --- -VG="pve" -THINPOOL="data" -SNAP_SUFFIX_FORMAT="%Y%m%d_%H%M" -RETENTION_DAYS=7 -MIN_FREE_PCT=10 -PUSHGATEWAY="${LVM_SNAP_PUSHGATEWAY:-http://10.0.20.100:30091}" -PUSHGATEWAY_JOB="lvm-pvc-snapshot" -LOCKFILE="/run/lvm-pvc-snapshot.lock" -KUBECONFIG="${KUBECONFIG:-/root/.kube/config}" -export KUBECONFIG - -# Namespaces to exclude from snapshots (high-churn, have app-level dumps) -# These PVCs cause significant CoW write amplification (~36% overhead) -EXCLUDE_NAMESPACES="${LVM_SNAP_EXCLUDE_NS:-dbaas,monitoring}" - -# --- Logging --- -log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; } -warn() { log "WARN: $*" >&2; } -die() { log "FATAL: $*" >&2; exit 1; } - -# --- Helpers --- - -get_thinpool_free_pct() { - local data_pct - data_pct=$(lvs --noheadings --nosuffix -o data_percent "${VG}/${THINPOOL}" 2>/dev/null | tr -d ' ') - echo "scale=2; 100 - ${data_pct}" | bc -} - -build_exclude_lv_list() { - # Query K8s for PVs in excluded namespaces, extract their LV names - if [[ -z "${EXCLUDE_NAMESPACES}" ]] || ! command -v kubectl &>/dev/null; then - return - fi - kubectl get pv -o json 2>/dev/null | jq -r --arg ns "${EXCLUDE_NAMESPACES}" ' - ($ns | split(",")) as $excl | - .items[] | - select(.spec.csi.driver == "csi.proxmox.sinextra.dev") | - select(.spec.claimRef.namespace as $n | $excl | index($n)) | - .spec.csi.volumeHandle | split("/") | last - ' 2>/dev/null || true -} - -discover_pvc_lvs() { - # List thin LVs matching PVC pattern, excluding snapshots, pre-restore backups, - # and LVs belonging to excluded namespaces (high-churn databases/metrics) - local all_lvs exclude_lvs - all_lvs=$(lvs --noheadings -o lv_name,pool_lv "${VG}" 2>/dev/null \ - | awk -v pool="${THINPOOL}" '$2 == pool { print $1 }' \ - | grep -E '^vm-[0-9]+-pvc-' \ - | grep -v '_snap_' \ - | grep -v '_pre_restore_') - - exclude_lvs=$(build_exclude_lv_list) - - if [[ -n "${exclude_lvs}" ]]; then - # Filter out excluded LVs - local exclude_pattern - exclude_pattern=$(echo "${exclude_lvs}" | paste -sd'|' -) - echo "${all_lvs}" | grep -vE "(${exclude_pattern})" || true - else - echo "${all_lvs}" - fi -} - -list_snapshots() { - lvs --noheadings -o lv_name,pool_lv "${VG}" 2>/dev/null \ - | awk -v pool="${THINPOOL}" '$2 == pool { print $1 }' \ - | grep '_snap_' || true -} - -parse_snap_timestamp() { - # Extract YYYYMMDD_HHMM from snapshot name, convert to epoch - local snap_name="$1" - local ts_str - ts_str=$(echo "${snap_name}" | grep -oE '[0-9]{8}_[0-9]{4}$') - if [[ -z "${ts_str}" ]]; then - echo "0" - return - fi - local ymd="${ts_str:0:8}" - local hm="${ts_str:9:4}" - date -d "${ymd:0:4}-${ymd:4:2}-${ymd:6:2} ${hm:0:2}:${hm:2:2}" +%s 2>/dev/null || echo "0" -} - -get_original_lv_from_snap() { - # vm-200-pvc-abc_snap_20260403_1200 -> vm-200-pvc-abc - echo "$1" | sed 's/_snap_[0-9]\{8\}_[0-9]\{4\}$//' -} - -push_metrics() { - local status="$1" created="$2" failed="$3" pruned="$4" - local free_pct - free_pct=$(get_thinpool_free_pct) - - cat </dev/null || warn "Failed to push metrics to Pushgateway" -# HELP lvm_snapshot_last_run_timestamp Unix timestamp of last snapshot run -# TYPE lvm_snapshot_last_run_timestamp gauge -lvm_snapshot_last_run_timestamp $(date +%s) -# HELP lvm_snapshot_last_status Exit status (0=success, 1=partial failure, 2=aborted) -# TYPE lvm_snapshot_last_status gauge -lvm_snapshot_last_status ${status} -# HELP lvm_snapshot_created_total Number of snapshots created in last run -# TYPE lvm_snapshot_created_total gauge -lvm_snapshot_created_total ${created} -# HELP lvm_snapshot_failed_total Number of snapshot failures in last run -# TYPE lvm_snapshot_failed_total gauge -lvm_snapshot_failed_total ${failed} -# HELP lvm_snapshot_pruned_total Number of snapshots pruned in last run -# TYPE lvm_snapshot_pruned_total gauge -lvm_snapshot_pruned_total ${pruned} -# HELP lvm_snapshot_thinpool_free_pct Thin pool free percentage -# TYPE lvm_snapshot_thinpool_free_pct gauge -lvm_snapshot_thinpool_free_pct ${free_pct} -METRICS -} - -# --- Subcommands --- - -cmd_snapshot() { - log "Starting PVC LVM thin snapshot run" - - # Check thin pool free space - local free_pct - free_pct=$(get_thinpool_free_pct) - log "Thin pool free space: ${free_pct}%" - if (( $(echo "${free_pct} < ${MIN_FREE_PCT}" | bc -l) )); then - warn "Thin pool has only ${free_pct}% free (minimum: ${MIN_FREE_PCT}%). Aborting." - push_metrics 2 0 0 0 - exit 1 - fi - - # Discover PVC LVs - local lvs_list - lvs_list=$(discover_pvc_lvs) - if [[ -z "${lvs_list}" ]]; then - warn "No PVC LVs found matching pattern" - push_metrics 2 0 0 0 - exit 1 - fi - - local count=0 failed=0 total - total=$(echo "${lvs_list}" | wc -l | tr -d ' ') - local snap_ts - snap_ts=$(date +"${SNAP_SUFFIX_FORMAT}") - - log "Found ${total} PVC LVs to snapshot" - - while IFS= read -r lv; do - local snap_name="${lv}_snap_${snap_ts}" - if lvcreate -s -kn -n "${snap_name}" "${VG}/${lv}" >/dev/null 2>&1; then - log " Created: ${snap_name}" - count=$((count + 1)) - else - warn " Failed to create snapshot for ${lv}" - failed=$((failed + 1)) - fi - done <<< "${lvs_list}" - - log "Snapshot run complete: ${count} created, ${failed} failed out of ${total}" - - # Auto-prune - log "Running auto-prune..." - local pruned - pruned=$(cmd_prune_count) - - # Determine status - local status=0 - if (( failed > 0 && count > 0 )); then - status=1 # partial - elif (( failed > 0 && count == 0 )); then - status=2 # all failed - fi - - push_metrics "${status}" "${count}" "${failed}" "${pruned}" - log "Done" -} - -cmd_list() { - printf "%-45s %-50s %8s %8s\n" "ORIGINAL LV" "SNAPSHOT" "AGE" "DATA%" - printf "%-45s %-50s %8s %8s\n" "-----------" "--------" "---" "-----" - - local now - now=$(date +%s) - - local snap_lines - snap_lines=$(lvs --noheadings --nosuffix -o lv_name,lv_size,data_percent "${VG}" 2>/dev/null \ - | grep -E '_snap_|_pre_restore_' || true) - - if [[ -z "${snap_lines}" ]]; then - echo "(no snapshots found)" - return - fi - - echo "${snap_lines}" | while read -r name size data_pct; do - local original age_str ts epoch - if [[ "${name}" == *"_pre_restore_"* ]]; then - original=$(echo "${name}" | sed 's/_pre_restore_[0-9]\{8\}_[0-9]\{4\}$//') - ts=$(echo "${name}" | grep -oE '[0-9]{8}_[0-9]{4}$') - else - original=$(get_original_lv_from_snap "${name}") - ts=$(echo "${name}" | grep -oE '[0-9]{8}_[0-9]{4}$') - fi - epoch=$(parse_snap_timestamp "${name}") - if (( epoch > 0 )); then - local age_s=$(( now - epoch )) - local days=$(( age_s / 86400 )) - local hours=$(( (age_s % 86400) / 3600 )) - age_str="${days}d${hours}h" - else - age_str="unknown" - fi - printf "%-45s %-50s %8s %7s%%\n" "${original}" "${name}" "${age_str}" "${data_pct}" - done -} - -cmd_prune() { - local pruned - pruned=$(cmd_prune_count) - log "Pruned ${pruned} expired snapshots" -} - -cmd_prune_count() { - # NOTE: stdout of this function is captured by callers (`pruned=$(cmd_prune_count)`), - # so all log/warn output must go to stderr — the only thing on stdout is the count. - local now cutoff pruned=0 - now=$(date +%s) - cutoff=$(( now - RETENTION_DAYS * 86400 )) - - local snaps - snaps=$(lvs --noheadings -o lv_name,pool_lv "${VG}" 2>/dev/null \ - | awk -v pool="${THINPOOL}" '$2 == pool { print $1 }' \ - | grep -E '_snap_|_pre_restore_' || true) - - if [[ -z "${snaps}" ]]; then - echo "0" - return - fi - - while IFS= read -r snap; do - local epoch - epoch=$(parse_snap_timestamp "${snap}") - if (( epoch > 0 && epoch < cutoff )); then - if lvremove -f "${VG}/${snap}" >/dev/null 2>&1; then - log " Pruned: ${snap}" >&2 - pruned=$((pruned + 1)) - else - warn " Failed to prune: ${snap}" - fi - fi - done <<< "${snaps}" - - echo "${pruned}" -} - -cmd_restore() { - local pvc_lv="${1:-}" snapshot_lv="${2:-}" - - if [[ -z "${pvc_lv}" || -z "${snapshot_lv}" ]]; then - die "Usage: $0 restore " - fi - - # Validate LVs exist - if ! lvs "${VG}/${pvc_lv}" >/dev/null 2>&1; then - die "PVC LV '${pvc_lv}' not found in VG '${VG}'" - fi - if ! lvs "${VG}/${snapshot_lv}" >/dev/null 2>&1; then - die "Snapshot LV '${snapshot_lv}' not found in VG '${VG}'" - fi - - # Discover K8s context - log "Discovering Kubernetes context for LV '${pvc_lv}'..." - - local volume_handle="local-lvm:${pvc_lv}" - local pv_info - pv_info=$(kubectl get pv -o json 2>/dev/null | jq -r \ - --arg vh "${volume_handle}" \ - '.items[] | select(.spec.csi.volumeHandle == $vh) | "\(.metadata.name) \(.spec.claimRef.namespace) \(.spec.claimRef.name)"' \ - ) || die "Failed to query PVs (is kubectl configured?)" - - if [[ -z "${pv_info}" ]]; then - die "No PV found with volumeHandle '${volume_handle}'" - fi - - local pv_name pvc_ns pvc_name - read -r pv_name pvc_ns pvc_name <<< "${pv_info}" - log "Found: PV=${pv_name}, PVC=${pvc_ns}/${pvc_name}" - - # Find the workload (Deployment or StatefulSet) that uses this PVC - local workload_type="" workload_name="" original_replicas="" - - # Check StatefulSets first (databases use these) - local sts_info - sts_info=$(kubectl get statefulset -n "${pvc_ns}" -o json 2>/dev/null | jq -r \ - --arg pvc "${pvc_name}" \ - '.items[] | select( - (.spec.template.spec.volumes // [] | .[].persistentVolumeClaim.claimName == $pvc) or - (.spec.volumeClaimTemplates // [] | .[].metadata.name as $vct | - .spec.replicas as $r | range($r) | "\($vct)-\(.metadata.name)-\(.)" ) == $pvc - ) | "\(.metadata.name) \(.spec.replicas)"' 2>/dev/null \ - ) || true - - # If not found via simple volume check, try matching VCT naming pattern - if [[ -z "${sts_info}" ]]; then - sts_info=$(kubectl get statefulset -n "${pvc_ns}" -o json 2>/dev/null | jq -r \ - --arg pvc "${pvc_name}" \ - '.items[] | .metadata.name as $sts | .spec.replicas as $r | - select(.spec.volumeClaimTemplates != null) | - .spec.volumeClaimTemplates[].metadata.name as $vct | - [range($r)] | map("\($vct)-\($sts)-\(.)") | - if any(. == $pvc) then "\($sts) \($r)" else empty end' 2>/dev/null \ - ) || true - fi - - if [[ -n "${sts_info}" ]]; then - read -r workload_name original_replicas <<< "${sts_info}" - workload_type="statefulset" - else - # Check Deployments - local deploy_info - deploy_info=$(kubectl get deployment -n "${pvc_ns}" -o json 2>/dev/null | jq -r \ - --arg pvc "${pvc_name}" \ - '.items[] | select( - .spec.template.spec.volumes // [] | .[].persistentVolumeClaim.claimName == $pvc - ) | "\(.metadata.name) \(.spec.replicas)"' 2>/dev/null \ - ) || true - - if [[ -n "${deploy_info}" ]]; then - read -r workload_name original_replicas <<< "${deploy_info}" - workload_type="deployment" - fi - fi - - if [[ -z "${workload_type}" ]]; then - warn "Could not auto-discover workload for PVC '${pvc_name}' in namespace '${pvc_ns}'." - warn "You may need to scale down the pod manually." - echo "" - read -rp "Continue with LV swap anyway? (yes/no): " confirm - [[ "${confirm}" == "yes" ]] || die "Aborted by user" - workload_type="manual" - fi - - # Dry-run output - local backup_name="${pvc_lv}_pre_restore_$(date +"${SNAP_SUFFIX_FORMAT}")" - echo "" - echo "╔══════════════════════════════════════════════════════════════╗" - echo "║ RESTORE DRY-RUN ║" - echo "╠══════════════════════════════════════════════════════════════╣" - echo "║ PVC: ${pvc_ns}/${pvc_name}" - echo "║ PV: ${pv_name}" - if [[ "${workload_type}" != "manual" ]]; then - echo "║ Workload: ${workload_type}/${workload_name} (replicas: ${original_replicas}→0→${original_replicas})" - fi - echo "║" - echo "║ Actions:" - if [[ "${workload_type}" != "manual" ]]; then - echo "║ 1. Scale ${workload_type}/${workload_name} to 0 replicas" - echo "║ 2. Wait for pod termination" - fi - echo "║ 3. Rename ${pvc_lv} → ${backup_name}" - echo "║ 4. Rename ${snapshot_lv} → ${pvc_lv}" - if [[ "${workload_type}" != "manual" ]]; then - echo "║ 5. Scale ${workload_type}/${workload_name} back to ${original_replicas} replicas" - fi - echo "╚══════════════════════════════════════════════════════════════╝" - echo "" - - # Interactive confirmation - read -rp "Type 'yes' to proceed with restore: " confirm - if [[ "${confirm}" != "yes" ]]; then - die "Aborted by user" - fi - - # Scale down - if [[ "${workload_type}" != "manual" ]]; then - log "Scaling ${workload_type}/${workload_name} to 0 replicas..." - kubectl scale "${workload_type}/${workload_name}" -n "${pvc_ns}" --replicas=0 - - log "Waiting for pod termination (timeout: 120s)..." - kubectl wait --for=delete pod -l "app.kubernetes.io/name=${workload_name}" -n "${pvc_ns}" --timeout=120s 2>/dev/null || \ - kubectl wait --for=delete pod -l "app=${workload_name}" -n "${pvc_ns}" --timeout=120s 2>/dev/null || \ - warn "Timeout waiting for pods — continuing anyway (LV may still be in use)" - sleep 5 # extra grace period for device detach - fi - - # Verify LV is not active - local lv_active - lv_active=$(lvs --noheadings -o lv_active "${VG}/${pvc_lv}" 2>/dev/null | tr -d ' ') - if [[ "${lv_active}" == "active" ]]; then - warn "LV ${pvc_lv} is still active. Attempting to deactivate..." - # Close any LUKS mapper on the LV before deactivation - if dmsetup ls 2>/dev/null | grep -q "${pvc_lv}"; then - log "Closing LUKS mapper for ${pvc_lv}..." - cryptsetup luksClose "${pvc_lv}" 2>/dev/null || true - fi - lvchange -an "${VG}/${pvc_lv}" 2>/dev/null || warn "Could not deactivate — proceeding with caution" - fi - - # LV swap - log "Renaming ${pvc_lv} → ${backup_name}" - lvrename "${VG}" "${pvc_lv}" "${backup_name}" || die "Failed to rename original LV" - - log "Renaming ${snapshot_lv} → ${pvc_lv}" - lvrename "${VG}" "${snapshot_lv}" "${pvc_lv}" || die "Failed to rename snapshot LV" - - # Scale back up - if [[ "${workload_type}" != "manual" ]]; then - log "Scaling ${workload_type}/${workload_name} back to ${original_replicas} replicas..." - kubectl scale "${workload_type}/${workload_name}" -n "${pvc_ns}" --replicas="${original_replicas}" - - log "Waiting for pod to become Ready (timeout: 300s)..." - kubectl wait --for=condition=Ready pod -l "app.kubernetes.io/name=${workload_name}" -n "${pvc_ns}" --timeout=300s 2>/dev/null || \ - kubectl wait --for=condition=Ready pod -l "app=${workload_name}" -n "${pvc_ns}" --timeout=300s 2>/dev/null || \ - warn "Timeout waiting for pod Ready — check manually" - fi - - echo "" - log "Restore complete!" - log "Old data preserved as: ${backup_name}" - log "To delete old data after verification: lvremove -f ${VG}/${backup_name}" -} - -# --- Main --- - -usage() { - cat < [args] - -Commands: - snapshot Create thin snapshots of all PVC LVs - list List existing snapshots with age and data% - prune Remove snapshots older than ${RETENTION_DAYS} days - restore Restore a PVC from a snapshot (interactive) - -Environment: - LVM_SNAP_PUSHGATEWAY Pushgateway URL (default: ${PUSHGATEWAY}) - KUBECONFIG Kubeconfig path (default: /root/.kube/config) -EOF -} - -main() { - local cmd="${1:-}" - shift || true - - # Acquire lock (except for list which is read-only) - if [[ "${cmd}" != "list" && "${cmd}" != "" && "${cmd}" != "help" && "${cmd}" != "--help" && "${cmd}" != "-h" ]]; then - exec 200>"${LOCKFILE}" - if ! flock -n 200; then - die "Another instance is already running (lockfile: ${LOCKFILE})" - fi - fi - - case "${cmd}" in - snapshot) cmd_snapshot ;; - list) cmd_list ;; - prune) cmd_prune ;; - restore) cmd_restore "$@" ;; - help|--help|-h|"") usage ;; - *) die "Unknown command: ${cmd}. Run '$0 help' for usage." ;; - esac -} - -main "$@" diff --git a/scripts/tg b/scripts/tg index 15cea845..8cb38e20 100755 --- a/scripts/tg +++ b/scripts/tg @@ -72,23 +72,12 @@ if [ -n "$STACK_NAME" ]; then else # Tier 1: PG backend — fetch credentials from Vault if [ -z "${PG_CONN_STR:-}" ]; then - # Pre-flight: vault CLI must be available. Previously CI failed with a - # misleading "Cannot read PG credentials" message because the Alpine CI - # image lacked the vault binary — the 2>/dev/null below swallowed the - # real "vault: not found" error. Fail fast with a clear message instead. - if ! command -v vault >/dev/null 2>&1; then - echo "ERROR: vault CLI not found on PATH. Install it or use an image that includes it (ci/Dockerfile)." >&2 - exit 1 - fi - VAULT_OUT=$(vault read -format=json database/static-creds/pg-terraform-state 2>&1) || { - echo "ERROR: Cannot read PG credentials from Vault. Vault output follows:" >&2 - echo "$VAULT_OUT" >&2 - echo "" >&2 - echo "Hint: humans run 'vault login -method=oidc'; CI auths via K8s SA (role=ci)." >&2 + PG_CREDS=$(vault read -format=json database/static-creds/pg-terraform-state 2>/dev/null) || { + echo "ERROR: Cannot read PG credentials from Vault. Run: vault login -method=oidc" >&2 exit 1 } - PG_USER=$(echo "$VAULT_OUT" | jq -r .data.username) - PG_PASS=$(echo "$VAULT_OUT" | jq -r .data.password) + PG_USER=$(echo "$PG_CREDS" | jq -r .data.username) + PG_PASS=$(echo "$PG_CREDS" | jq -r .data.password) export PG_CONN_STR="postgres://${PG_USER}:${PG_PASS}@10.0.20.200:5432/terraform_state?sslmode=disable" fi fi diff --git a/secrets/fullchain.pem b/secrets/fullchain.pem index e4bc0d60..be5b1a00 100644 Binary files a/secrets/fullchain.pem and b/secrets/fullchain.pem differ diff --git a/secrets/privkey.pem b/secrets/privkey.pem index 1f38edfe..b488a0d6 100644 Binary files a/secrets/privkey.pem and b/secrets/privkey.pem differ diff --git a/stacks/beads-server/main.tf b/stacks/beads-server/main.tf index e11b0ac7..01f75ff4 100644 --- a/stacks/beads-server/main.tf +++ b/stacks/beads-server/main.tf @@ -14,7 +14,7 @@ variable "beadboard_image_tag" { # already ships. variable "claude_agent_service_image_tag" { type = string - default = "2fd7670d" + default = "0c24c9b6" } # Kill switch for auto-dispatch. When false, both CronJobs are suspended. The diff --git a/stacks/broker-sync/main.tf b/stacks/broker-sync/main.tf index 7c99a916..b3c71905 100644 --- a/stacks/broker-sync/main.tf +++ b/stacks/broker-sync/main.tf @@ -105,7 +105,7 @@ resource "kubernetes_cron_job_v1" "version_probe" { metadata {} spec { backoff_limit = 1 - ttl_seconds_after_finished = 86400 + ttl_seconds_after_finished = 300 template { metadata { labels = { app = "broker-sync", component = "version-probe" } @@ -246,12 +246,7 @@ resource "kubernetes_cron_job_v1" "imap" { concurrency_policy = "Forbid" successful_jobs_history_limit = 3 failed_jobs_history_limit = 5 - # Unsuspended 2026-04-19 for RSU vest ground-truth ingestion — the parser - # now detects Schwab Release Confirmations and scaffolds VestEvents; the - # postgres sink that persists them into payslip_ingest.rsu_vest_events is - # pending a real-email fixture and cross-service DB grant (see - # follow-up beads task filed under the RSU tax spike fix epic). - suspend = false + suspend = true # enable in Phase 2 job_template { metadata {} spec { diff --git a/stacks/crowdsec/modules/crowdsec/main.tf b/stacks/crowdsec/modules/crowdsec/main.tf index ca7b1998..cf59ea47 100644 --- a/stacks/crowdsec/modules/crowdsec/main.tf +++ b/stacks/crowdsec/modules/crowdsec/main.tf @@ -96,21 +96,6 @@ resource "kubernetes_config_map" "crowdsec_whitelist" { reason: "Trusted IP - never block" ip: - "176.12.22.76" - --- - name: viktor/immich-asset-paths-whitelist - description: "Don't penalise legit Immich timeline bursts (mobile scrub, web grid)" - whitelist: - reason: "Immich asset endpoints are auth-gated; mobile scrub legitimately bursts" - expression: - - > - evt.Parsed.target_fqdn == "immich.viktorbarzin.me" && - (evt.Parsed.request startsWith "/api/assets/" || - evt.Parsed.request startsWith "/api/timeline/" || - evt.Parsed.request startsWith "/api/asset/" || - evt.Parsed.request startsWith "/api/search/" || - evt.Parsed.request startsWith "/api/memories" || - evt.Parsed.request startsWith "/api/albums" || - evt.Parsed.request startsWith "/api/activities") YAML } } diff --git a/stacks/dbaas/modules/dbaas/main.tf b/stacks/dbaas/modules/dbaas/main.tf index 1ae6f415..8389aa93 100644 --- a/stacks/dbaas/modules/dbaas/main.tf +++ b/stacks/dbaas/modules/dbaas/main.tf @@ -157,9 +157,9 @@ resource "kubernetes_stateful_set_v1" "mysql_standalone" { required_during_scheduling_ignored_during_execution { node_selector_term { match_expressions { - key = "nvidia.com/gpu.present" + key = "kubernetes.io/hostname" operator = "NotIn" - values = ["true"] + values = ["k8s-node1"] } } } @@ -1209,61 +1209,6 @@ resource "null_resource" "pg_job_hunter_db" { } } -# Create wealthfolio_sync database for the SQLite→PG ETL sidecar that mirrors -# Wealthfolio's daily_account_valuation/accounts/activities into PG so Grafana -# can chart net worth, contributions, and growth. -# Role password is managed by Vault Database Secrets Engine (static role `pg-wealthfolio-sync`, 7d rotation). -resource "null_resource" "pg_wealthfolio_sync_db" { - depends_on = [null_resource.pg_cluster] - - triggers = { - db_name = "wealthfolio_sync" - username = "wealthfolio_sync" - } - - provisioner "local-exec" { - command = <<-EOT - PRIMARY=$(kubectl --kubeconfig ${var.kube_config_path} get cluster -n dbaas pg-cluster -o jsonpath='{.status.currentPrimary}') - kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas $PRIMARY -c postgres -- \ - bash -c ' - psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = '"'"'wealthfolio_sync'"'"'" | grep -q 1 || \ - psql -U postgres -c "CREATE ROLE wealthfolio_sync WITH LOGIN PASSWORD '"'"'changeme-vault-will-rotate'"'"'" - psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_database WHERE datname = '"'"'wealthfolio_sync'"'"'" | grep -q 1 || \ - psql -U postgres -c "CREATE DATABASE wealthfolio_sync OWNER wealthfolio_sync" - psql -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE wealthfolio_sync TO wealthfolio_sync" - ' - EOT - } -} - -# Create fire_planner database for the FIRE retirement-planning service. -# Role password is managed by Vault Database Secrets Engine -# (static role `pg-fire-planner`, 7d rotation). -# fire_planner reads from payslip_ingest + wealthfolio_sync (read-only) -# and writes its own MC results into schema fire_planner. -resource "null_resource" "pg_fire_planner_db" { - depends_on = [null_resource.pg_cluster] - - triggers = { - db_name = "fire_planner" - username = "fire_planner" - } - - provisioner "local-exec" { - command = <<-EOT - PRIMARY=$(kubectl --kubeconfig ${var.kube_config_path} get cluster -n dbaas pg-cluster -o jsonpath='{.status.currentPrimary}') - kubectl --kubeconfig ${var.kube_config_path} exec -n dbaas $PRIMARY -c postgres -- \ - bash -c ' - psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = '"'"'fire_planner'"'"'" | grep -q 1 || \ - psql -U postgres -c "CREATE ROLE fire_planner WITH LOGIN PASSWORD '"'"'changeme-vault-will-rotate'"'"'" - psql -U postgres -tc "SELECT 1 FROM pg_catalog.pg_database WHERE datname = '"'"'fire_planner'"'"'" | grep -q 1 || \ - psql -U postgres -c "CREATE DATABASE fire_planner OWNER fire_planner" - psql -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE fire_planner TO fire_planner" - ' - EOT - } -} - # Old PostgreSQL deployment — kept commented for rollback reference # resource "kubernetes_deployment" "postgres" { # metadata { diff --git a/stacks/ebook2audiobook/main.tf b/stacks/ebook2audiobook/main.tf index 8492991f..f9871882 100644 --- a/stacks/ebook2audiobook/main.tf +++ b/stacks/ebook2audiobook/main.tf @@ -72,7 +72,7 @@ resource "kubernetes_deployment" "ebook2audiobook" { spec { node_selector = { - "nvidia.com/gpu.present" : "true" + "gpu" : "true" } toleration { key = "nvidia.com/gpu" @@ -290,7 +290,7 @@ resource "kubernetes_deployment" "audiblez" { } spec { node_selector = { - "nvidia.com/gpu.present" : "true" + "gpu" : "true" } toleration { key = "nvidia.com/gpu" @@ -356,7 +356,7 @@ resource "kubernetes_deployment" "audiblez-web" { } spec { node_selector = { - "nvidia.com/gpu.present" : "true" + "gpu" : "true" } toleration { key = "nvidia.com/gpu" diff --git a/stacks/fire-planner/main.tf b/stacks/fire-planner/main.tf deleted file mode 100644 index 09e1177b..00000000 --- a/stacks/fire-planner/main.tf +++ /dev/null @@ -1,383 +0,0 @@ -variable "image_tag" { - type = string - default = "latest" - description = "fire-planner image tag. Use 8-char git SHA in CI; :latest only for local trials." -} - -variable "postgresql_host" { type = string } - -locals { - namespace = "fire-planner" - image = "registry.viktorbarzin.me/fire-planner:${var.image_tag}" - labels = { - app = "fire-planner" - } -} - -resource "kubernetes_namespace" "fire_planner" { - metadata { - name = local.namespace - labels = { - tier = local.tiers.aux - "istio-injection" = "disabled" - } - } - lifecycle { - # KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps - # this label on every namespace. - ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]] - } -} - -# App secrets — the recompute-API bearer token (manual seed in Vault). -# Seed before applying: -# secret/fire-planner -> property `recompute_bearer_token` -resource "kubernetes_manifest" "external_secret" { - manifest = { - apiVersion = "external-secrets.io/v1beta1" - kind = "ExternalSecret" - metadata = { - name = "fire-planner-secrets" - namespace = local.namespace - } - spec = { - refreshInterval = "15m" - secretStoreRef = { - name = "vault-kv" - kind = "ClusterSecretStore" - } - target = { - name = "fire-planner-secrets" - template = { - metadata = { - annotations = { - "reloader.stakater.com/match" = "true" - } - } - } - } - data = [ - { - secretKey = "RECOMPUTE_BEARER_TOKEN" - remoteRef = { - key = "fire-planner" - property = "recompute_bearer_token" - } - }, - ] - } - } - depends_on = [kubernetes_namespace.fire_planner] -} - -# DB credentials from Vault database engine (rotated every 7 days). -# Template builds the asyncpg DSN consumed by the FastAPI app + CronJob -# as DB_CONNECTION_STRING. -resource "kubernetes_manifest" "db_external_secret" { - manifest = { - apiVersion = "external-secrets.io/v1beta1" - kind = "ExternalSecret" - metadata = { - name = "fire-planner-db-creds" - namespace = local.namespace - } - spec = { - refreshInterval = "15m" - secretStoreRef = { - name = "vault-database" - kind = "ClusterSecretStore" - } - target = { - name = "fire-planner-db-creds" - template = { - metadata = { - annotations = { - "reloader.stakater.com/match" = "true" - } - } - data = { - DB_CONNECTION_STRING = "postgresql+asyncpg://fire_planner:{{ .password }}@${var.postgresql_host}:5432/fire_planner" - DB_PASSWORD = "{{ .password }}" - } - } - } - data = [{ - secretKey = "password" - remoteRef = { - key = "static-creds/pg-fire-planner" - property = "password" - } - }] - } - } - depends_on = [kubernetes_namespace.fire_planner] -} - -resource "kubernetes_deployment" "fire_planner" { - metadata { - name = "fire-planner" - namespace = kubernetes_namespace.fire_planner.metadata[0].name - labels = merge(local.labels, { - tier = local.tiers.aux - }) - annotations = { - "reloader.stakater.com/search" = "true" - } - } - - spec { - replicas = 1 - strategy { - type = "Recreate" - } - - selector { - match_labels = local.labels - } - - template { - metadata { - labels = local.labels - annotations = { - "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432" - } - } - - spec { - image_pull_secrets { - name = "registry-credentials" - } - - init_container { - name = "alembic-migrate" - image = local.image - command = ["python", "-m", "fire_planner", "migrate"] - - env_from { - secret_ref { - name = "fire-planner-db-creds" - } - } - - resources { - requests = { - cpu = "50m" - memory = "256Mi" - } - limits = { - memory = "512Mi" - } - } - } - - container { - name = "fire-planner" - image = local.image - - command = ["python", "-m", "fire_planner", "serve"] - - port { - container_port = 8080 - } - - env_from { - secret_ref { - name = "fire-planner-secrets" - } - } - env_from { - secret_ref { - name = "fire-planner-db-creds" - } - } - - readiness_probe { - http_get { - path = "/healthz" - port = 8080 - } - initial_delay_seconds = 5 - period_seconds = 10 - } - - liveness_probe { - http_get { - path = "/healthz" - port = 8080 - } - initial_delay_seconds = 5 - period_seconds = 10 - } - - resources { - requests = { - cpu = "100m" - memory = "512Mi" - } - limits = { - memory = "1024Mi" - } - } - } - } - } - } - - lifecycle { - ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1 - } - - depends_on = [ - kubernetes_manifest.external_secret, - kubernetes_manifest.db_external_secret, - ] -} - -# ClusterIP-only — /recompute is cluster-internal (operator triggers -# via kubectl port-forward or ad-hoc CronJob). -resource "kubernetes_service" "fire_planner" { - metadata { - name = "fire-planner" - namespace = kubernetes_namespace.fire_planner.metadata[0].name - labels = local.labels - } - - spec { - type = "ClusterIP" - selector = local.labels - - port { - name = "http" - port = 8080 - target_port = 8080 - } - } -} - -# Monthly recompute on the 2nd at 09:00 UTC. Wealthfolio-sync runs on -# the 1st at 08:00, so account_snapshot is fresh by the time the -# planner picks up. -resource "kubernetes_cron_job_v1" "fire_planner_recompute" { - metadata { - name = "fire-planner-recompute" - namespace = kubernetes_namespace.fire_planner.metadata[0].name - } - spec { - schedule = "0 9 2 * *" - concurrency_policy = "Forbid" - successful_jobs_history_limit = 3 - failed_jobs_history_limit = 5 - starting_deadline_seconds = 600 - - job_template { - metadata { - labels = local.labels - } - spec { - backoff_limit = 1 - ttl_seconds_after_finished = 86400 - template { - metadata { - labels = local.labels - } - spec { - restart_policy = "OnFailure" - image_pull_secrets { - name = "registry-credentials" - } - container { - name = "recompute" - image = local.image - command = ["python", "-m", "fire_planner", "recompute-all"] - - env_from { - secret_ref { - name = "fire-planner-secrets" - } - } - env_from { - secret_ref { - name = "fire-planner-db-creds" - } - } - - resources { - requests = { - cpu = "200m" - memory = "1Gi" - } - limits = { - memory = "2Gi" - } - } - } - } - } - } - } - } - - lifecycle { - # KYVERNO_LIFECYCLE_V1 - ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] - } - - depends_on = [ - kubernetes_manifest.external_secret, - kubernetes_manifest.db_external_secret, - ] -} - -# Plan-time read of the ESO-created K8s Secret for Grafana datasource -# password. First-apply gotcha: must -# `terragrunt apply -target=kubernetes_manifest.db_external_secret` so -# the Secret exists before this data source plans. -data "kubernetes_secret" "fire_planner_db_creds" { - metadata { - name = "fire-planner-db-creds" - namespace = kubernetes_namespace.fire_planner.metadata[0].name - } - depends_on = [kubernetes_manifest.db_external_secret] -} - -# Grafana datasource for fire_planner PostgreSQL DB. -# Lives in the monitoring namespace so the grafana sidecar -# (label grafana_datasource=1) picks it up. -# -# Grafana 11.2+ Postgres plugin reads the DB name from jsonData.database; -# the top-level `database` field is silently ignored by the frontend and -# triggers "you do not have default database" on every panel. -# See github.com/grafana/grafana#112418 — same fix as the payslip-ingest -# datasource (commit cc56ba29). -resource "kubernetes_config_map" "grafana_fire_planner_datasource" { - metadata { - name = "grafana-fire-planner-datasource" - namespace = "monitoring" - labels = { - grafana_datasource = "1" - } - } - data = { - "fire-planner-datasource.yaml" = yamlencode({ - apiVersion = 1 - datasources = [{ - name = "FirePlanner" - type = "postgres" - access = "proxy" - url = "${var.postgresql_host}:5432" - user = "fire_planner" - uid = "fire-planner-pg" - jsonData = { - database = "fire_planner" - sslmode = "disable" - postgresVersion = 1600 - timescaledb = false - } - secureJsonData = { - password = data.kubernetes_secret.fire_planner_db_creds.data["DB_PASSWORD"] - } - editable = true - }] - }) - } -} diff --git a/stacks/fire-planner/terragrunt.hcl b/stacks/fire-planner/terragrunt.hcl deleted file mode 100644 index c1d2e468..00000000 --- a/stacks/fire-planner/terragrunt.hcl +++ /dev/null @@ -1,28 +0,0 @@ -include "root" { - path = find_in_parent_folders() -} - -dependency "platform" { - config_path = "../platform" - skip_outputs = true -} - -dependency "vault" { - config_path = "../vault" - skip_outputs = true -} - -dependency "external-secrets" { - config_path = "../external-secrets" - skip_outputs = true -} - -dependency "dbaas" { - config_path = "../dbaas" - skip_outputs = true -} - -inputs = { - # fire-planner repo HEAD — bump on every deploy. - image_tag = "latest" -} diff --git a/stacks/frigate/main.tf b/stacks/frigate/main.tf index 489daa63..31079be9 100644 --- a/stacks/frigate/main.tf +++ b/stacks/frigate/main.tf @@ -87,7 +87,7 @@ resource "kubernetes_deployment" "frigate" { } spec { node_selector = { - "nvidia.com/gpu.present" : "true" + "gpu" : true } toleration { key = "nvidia.com/gpu" diff --git a/stacks/hermes-agent/main.tf b/stacks/hermes-agent/main.tf index 0881932f..89de6d6b 100644 --- a/stacks/hermes-agent/main.tf +++ b/stacks/hermes-agent/main.tf @@ -220,8 +220,7 @@ resource "kubernetes_deployment" "hermes_agent" { strategy { type = "Recreate" } - # Disabled 2026-04-22 — main container fails with "mkdir: cannot create directory '/opt/data': Permission denied" (fsGroup/runAsUser mismatch vs init container). Re-enable after fixing PVC permissions. - replicas = 0 + replicas = 1 selector { match_labels = { app = "hermes-agent" diff --git a/stacks/immich/main.tf b/stacks/immich/main.tf index 3ee56d1f..b17e7d55 100644 --- a/stacks/immich/main.tf +++ b/stacks/immich/main.tf @@ -85,30 +85,6 @@ module "nfs_postgresql_host" { nfs_path = "/srv/nfs/immich/postgresql" } -# Migrated 2026-04-25: PG live data moved off NFS to LUKS-encrypted block. -# WAL fsync per commit on NFS contributed to the 2026-04-22 NFS writeback storm -# (see post-mortems/2026-04-22-vault-raft-leader-deadlock.md). -# Backup CronJob still writes to module.nfs_postgresql_host (NFS append-only). -resource "kubernetes_persistent_volume_claim" "immich_postgresql_encrypted" { - wait_until_bound = false - metadata { - name = "immich-postgresql-data-encrypted" - namespace = kubernetes_namespace.immich.metadata[0].name - annotations = { - "resize.topolvm.io/threshold" = "80%" - "resize.topolvm.io/increase" = "100%" - "resize.topolvm.io/storage_limit" = "20Gi" - } - } - spec { - access_modes = ["ReadWriteOnce"] - storage_class_name = "proxmox-lvm-encrypted" - resources { - requests = { storage = "10Gi" } - } - } -} - module "nfs_ml_cache_host" { source = "../../modules/kubernetes/nfs_volume" name = "immich-ml-cache-host" @@ -188,7 +164,7 @@ resource "kubernetes_deployment" "immich_server" { } strategy { - type = "Recreate" + type = "RollingUpdate" } template { @@ -311,10 +287,10 @@ resource "kubernetes_deployment" "immich_server" { resources { requests = { cpu = "100m" - memory = "4096Mi" + memory = "2000Mi" } limits = { - memory = "4096Mi" + memory = "3500Mi" } } } @@ -486,13 +462,6 @@ resource "kubernetes_deployment" "immich-postgres" { name = "write-pg-override-conf" image = "busybox:1.36" command = ["sh", "-c", <<-EOT - # Skip write on uninitialised PGDATA — initdb refuses non-empty dirs. - # On first boot the override is absent; trigger a pod restart after - # initdb completes so the override is applied before extension load. - if [ ! -f /data/PG_VERSION ]; then - echo "PGDATA uninitialised, skipping override conf (will write on next pod start)" - exit 0 - fi cat > /data/postgresql.override.conf <<'PGCONF' # Immich vector search performance tuning shared_buffers = 2048MB @@ -512,7 +481,7 @@ resource "kubernetes_deployment" "immich-postgres" { volume { name = "postgresql-persistent-storage" persistent_volume_claim { - claim_name = kubernetes_persistent_volume_claim.immich_postgresql_encrypted.metadata[0].name + claim_name = module.nfs_postgresql_host.claim_name } } } @@ -579,7 +548,7 @@ resource "kubernetes_deployment" "immich-machine-learning" { } } strategy { - type = "Recreate" + type = "RollingUpdate" } template { metadata { @@ -590,7 +559,7 @@ resource "kubernetes_deployment" "immich-machine-learning" { spec { priority_class_name = "gpu-workload" node_selector = { - "nvidia.com/gpu.present" : "true" + "gpu" : "true" } toleration { key = "nvidia.com/gpu" diff --git a/stacks/job-hunter/terragrunt.hcl b/stacks/job-hunter/terragrunt.hcl index 8f4a32fb..93df44f1 100644 --- a/stacks/job-hunter/terragrunt.hcl +++ b/stacks/job-hunter/terragrunt.hcl @@ -18,9 +18,8 @@ dependency "external-secrets" { } inputs = { - # 92afc38d = master HEAD with levels.fyi scraper + comp_table COALESCE - # fix + Frankfurter FX backend (exchangerate.host free tier deprecated - # in 2026). Built + pushed locally 2026-04-19 while the Woodpecker - # Forgejo webhook remains broken. - image_tag = "92afc38d" + # 8-char SHA from the Forgejo commit viktor/job-hunter@9c42eac9 + # (first image built locally + pushed 2026-04-19 due to a Woodpecker + # v3.13 Forgejo webhook bug; bump on every deploy once CI recovers). + image_tag = "48f8615d" } diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts index 21405a94..f96f4d56 100644 --- a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts @@ -138,7 +138,7 @@ Kyverno auto-generates LimitRange + ResourceQuota per namespace based on tier la - **Proxmox**: 192.168.1.127 (Dell R730, 22c/44t, 142GB RAM) - **Nodes**: k8s-master (10.0.20.100), node1 (GPU, Tesla T4), node2-4 -- **GPU workloads**: \`node_selector = { "nvidia.com/gpu.present" : "true" }\` + toleration \`nvidia.com/gpu\` (label auto-applied by gpu-feature-discovery, no hostname pins) +- **GPU workloads**: \`node_selector = { "gpu": "true" }\` + toleration \`nvidia.com/gpu\` - **Pull-through cache**: 10.0.20.10 — use versioned image tags (cache serves stale :latest manifests) - **MySQL InnoDB Cluster**: 3 instances on iSCSI - **SMTP**: \`var.mail_host\` port 587 STARTTLS diff --git a/stacks/monitoring/main.tf b/stacks/monitoring/main.tf index 0c207aa0..c4961fdd 100644 --- a/stacks/monitoring/main.tf +++ b/stacks/monitoring/main.tf @@ -30,7 +30,6 @@ module "monitoring" { haos_api_token = data.vault_kv_secret_v2.secrets.data["haos_api_token"] pve_password = data.vault_kv_secret_v2.secrets.data["pve_password"] grafana_admin_password = data.vault_kv_secret_v2.secrets.data["grafana_admin_password"] - kube_config_path = var.kube_config_path registry_user = data.vault_kv_secret_v2.viktor.data["registry_user"] registry_password = data.vault_kv_secret_v2.viktor.data["registry_password"] tier = local.tiers.cluster diff --git a/stacks/monitoring/modules/monitoring/dashboards/fire-planner.json b/stacks/monitoring/modules/monitoring/dashboards/fire-planner.json deleted file mode 100644 index 9dba9e11..00000000 --- a/stacks/monitoring/modules/monitoring/dashboards/fire-planner.json +++ /dev/null @@ -1,226 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": {"type": "datasource", "uid": "grafana"}, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "FIRE Retirement Planner — risk-adjusted, tax-minimised Monte Carlo over jurisdictions, withdrawal strategies, and UK-departure years. Backed by fire_planner schema on pg-cluster-rw.", - "editable": true, - "fiscalYearStartMonth": 0, - "id": null, - "templating": { - "list": [ - { - "name": "scenario", - "type": "query", - "label": "Scenario", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "query": "SELECT external_id FROM fire_planner.scenario ORDER BY external_id", - "refresh": 1, - "includeAll": false, - "multi": false, - "current": {"selected": false, "text": "cyprus-vpw-leave-y3-glide-rising", "value": "cyprus-vpw-leave-y3-glide-rising"} - } - ] - }, - "links": [], - "panels": [ - { - "id": 1, - "title": "Net worth over time (real + nominal)", - "type": "timeseries", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "gridPos": {"h": 8, "w": 24, "x": 0, "y": 0}, - "fieldConfig": { - "defaults": {"unit": "currencyGBP", "decimals": 0}, - "overrides": [] - }, - "options": {"legend": {"displayMode": "table", "showLegend": true}, "tooltip": {"mode": "multi"}}, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "time_series", - "rawSql": "SELECT snapshot_date AS time, account_name AS metric, SUM(market_value_gbp) AS value FROM fire_planner.account_snapshot WHERE snapshot_date >= NOW() - INTERVAL '10 years' GROUP BY snapshot_date, account_name ORDER BY snapshot_date" - } - ] - }, - { - "id": 2, - "title": "Monte Carlo fan chart — selected scenario", - "type": "timeseries", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "gridPos": {"h": 10, "w": 24, "x": 0, "y": 8}, - "description": "P10/p25/p50/p75/p90 portfolio value across MC paths, for the scenario picked in the selector at the top.", - "fieldConfig": {"defaults": {"unit": "currencyGBP", "decimals": 0}, "overrides": []}, - "options": {"legend": {"displayMode": "table", "showLegend": true}, "tooltip": {"mode": "multi"}}, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "time_series", - "rawSql": "SELECT (DATE_TRUNC('year', NOW()) + (year_idx || ' years')::interval) AS time, 'p10' AS metric, p10_portfolio_gbp AS value FROM fire_planner.projection_yearly p JOIN fire_planner.mc_run r ON r.id = p.mc_run_id JOIN fire_planner.scenario s ON s.id = r.scenario_id WHERE s.external_id = '$scenario' UNION ALL SELECT (DATE_TRUNC('year', NOW()) + (year_idx || ' years')::interval), 'p25', p25_portfolio_gbp FROM fire_planner.projection_yearly p JOIN fire_planner.mc_run r ON r.id = p.mc_run_id JOIN fire_planner.scenario s ON s.id = r.scenario_id WHERE s.external_id = '$scenario' UNION ALL SELECT (DATE_TRUNC('year', NOW()) + (year_idx || ' years')::interval), 'p50', p50_portfolio_gbp FROM fire_planner.projection_yearly p JOIN fire_planner.mc_run r ON r.id = p.mc_run_id JOIN fire_planner.scenario s ON s.id = r.scenario_id WHERE s.external_id = '$scenario' UNION ALL SELECT (DATE_TRUNC('year', NOW()) + (year_idx || ' years')::interval), 'p75', p75_portfolio_gbp FROM fire_planner.projection_yearly p JOIN fire_planner.mc_run r ON r.id = p.mc_run_id JOIN fire_planner.scenario s ON s.id = r.scenario_id WHERE s.external_id = '$scenario' UNION ALL SELECT (DATE_TRUNC('year', NOW()) + (year_idx || ' years')::interval), 'p90', p90_portfolio_gbp FROM fire_planner.projection_yearly p JOIN fire_planner.mc_run r ON r.id = p.mc_run_id JOIN fire_planner.scenario s ON s.id = r.scenario_id WHERE s.external_id = '$scenario' ORDER BY time" - } - ] - }, - { - "id": 3, - "title": "Confidence heatmap — jurisdiction × strategy", - "type": "table", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "gridPos": {"h": 8, "w": 12, "x": 0, "y": 18}, - "description": "Median success rate by (jurisdiction, strategy), averaged across leave-UK years and glide paths.", - "fieldConfig": { - "defaults": {"custom": {"align": "left", "displayMode": "auto"}, "unit": "percentunit", "decimals": 2}, - "overrides": [] - }, - "options": {"showHeader": true}, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT jurisdiction, strategy, AVG(success_rate) AS avg_success FROM fire_planner.scenario_summary GROUP BY jurisdiction, strategy ORDER BY jurisdiction, strategy" - } - ] - }, - { - "id": 4, - "title": "Median lifetime tax — by jurisdiction", - "type": "barchart", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "gridPos": {"h": 8, "w": 12, "x": 12, "y": 18}, - "fieldConfig": {"defaults": {"unit": "currencyGBP", "decimals": 0}, "overrides": []}, - "options": {"orientation": "horizontal", "showValue": "auto", "stacking": "none", "legend": {"displayMode": "list"}}, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT jurisdiction, AVG(median_lifetime_tax_gbp) AS lifetime_tax FROM fire_planner.scenario_summary GROUP BY jurisdiction ORDER BY lifetime_tax DESC" - } - ] - }, - { - "id": 5, - "title": "Withdrawal runway — years to ruin (failing paths)", - "type": "table", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "gridPos": {"h": 8, "w": 12, "x": 0, "y": 26}, - "description": "Among scenarios where some MC paths failed, the median year-to-ruin. Empty where every path survives.", - "fieldConfig": {"defaults": {"unit": "y", "decimals": 1}, "overrides": []}, - "options": {"showHeader": true}, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT jurisdiction, strategy, leave_uk_year, glide_path, median_years_to_ruin FROM fire_planner.scenario_summary WHERE median_years_to_ruin IS NOT NULL ORDER BY median_years_to_ruin ASC LIMIT 20" - } - ] - }, - { - "id": 6, - "title": "Optimal leave-UK year", - "type": "stat", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "gridPos": {"h": 4, "w": 6, "x": 12, "y": 26}, - "description": "leave_uk_year that maximises success_rate − lifetime_tax (tax in £M; small weighting).", - "fieldConfig": {"defaults": {"unit": "none"}, "overrides": []}, - "options": {"colorMode": "value", "reduceOptions": {"calcs": ["lastNotNull"]}}, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT leave_uk_year FROM fire_planner.scenario_summary WHERE jurisdiction <> 'uk' ORDER BY (success_rate - median_lifetime_tax_gbp / 1000000.0) DESC LIMIT 1" - } - ] - }, - { - "id": 7, - "title": "Median ending wealth — selected scenario", - "type": "stat", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "gridPos": {"h": 4, "w": 6, "x": 18, "y": 26}, - "fieldConfig": {"defaults": {"unit": "currencyGBP", "decimals": 0}, "overrides": []}, - "options": {"colorMode": "value", "reduceOptions": {"calcs": ["lastNotNull"]}}, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT p50_ending_gbp FROM fire_planner.scenario_summary WHERE scenario_id = (SELECT id FROM fire_planner.scenario WHERE external_id = '$scenario')" - } - ] - }, - { - "id": 8, - "title": "Success rate vs spend (UK-stay)", - "type": "barchart", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "gridPos": {"h": 8, "w": 12, "x": 0, "y": 30}, - "description": "Sanity gauge — UK success rate by strategy, helps anchor expectations against published cFIREsim numbers.", - "fieldConfig": {"defaults": {"unit": "percentunit", "decimals": 2}, "overrides": []}, - "options": {"orientation": "horizontal", "showValue": "auto", "legend": {"displayMode": "list"}}, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT strategy, AVG(success_rate) AS success FROM fire_planner.scenario_summary WHERE jurisdiction = 'uk' GROUP BY strategy ORDER BY success DESC" - } - ] - }, - { - "id": 9, - "title": "Sequence-of-returns sensitivity (top failing scenarios)", - "type": "table", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "gridPos": {"h": 8, "w": 12, "x": 12, "y": 30}, - "description": "Pearson correlation between year-1 portfolio drawdown and overall success — strongly negative ⇒ scenario is sequence-of-returns sensitive (case for the rising-equity glide).", - "fieldConfig": {"defaults": {"unit": "none", "decimals": 4}, "overrides": []}, - "options": {"showHeader": true}, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "fire-planner-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT s.external_id, r.sequence_risk_correlation, r.success_rate FROM fire_planner.mc_run r JOIN fire_planner.scenario s ON s.id = r.scenario_id WHERE r.id IN (SELECT MAX(id) FROM fire_planner.mc_run GROUP BY scenario_id) ORDER BY r.sequence_risk_correlation ASC LIMIT 15" - } - ] - } - ], - "schemaVersion": 39, - "tags": ["finance", "fire", "retirement", "monte-carlo"], - "title": "FIRE Planner", - "uid": "fire-planner", - "version": 1, - "weekStart": "" -} diff --git a/stacks/monitoring/modules/monitoring/dashboards/job-hunter.json b/stacks/monitoring/modules/monitoring/dashboards/job-hunter.json index 526e2fe0..d38bc40c 100644 --- a/stacks/monitoring/modules/monitoring/dashboards/job-hunter.json +++ b/stacks/monitoring/modules/monitoring/dashboards/job-hunter.json @@ -197,192 +197,12 @@ ], "title": "Top roles", "type": "table" - }, - { - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "description": "Per-company median base salary broken out by seniority level (comp_points, GBP).", - "fieldConfig": { - "defaults": { - "color": {"mode": "thresholds"}, - "custom": { - "align": "auto", - "cellOptions": {"type": "auto"}, - "filterable": true, - "inspect": false - }, - "mappings": [], - "thresholds": {"mode": "absolute", "steps": []}, - "unit": "currencyGBP" - }, - "overrides": [] - }, - "gridPos": {"h": 10, "w": 24, "x": 0, "y": 29}, - "id": 6, - "options": { - "cellHeight": "sm", - "footer": {"countRows": false, "fields": "", "reducer": ["sum"], "show": false}, - "showHeader": true - }, - "targets": [ - { - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "format": "table", - "rawQuery": true, - "rawSql": "SELECT c.display_name AS company, l.slug AS level, percentile_cont(0.5) WITHIN GROUP (ORDER BY cp.base_gbp) AS p50_base_gbp, COUNT(*) AS n FROM job_hunter.comp_points cp JOIN job_hunter.companies c ON cp.company_id = c.id LEFT JOIN job_hunter.levels l ON cp.level_id = l.id WHERE cp.base_gbp IS NOT NULL AND cp.location_bucket IN (${location:sqlstring}) AND (c.slug = ANY(string_to_array(${company:sqlstring}, ',')) OR ${company:sqlstring} = 'all') GROUP BY c.display_name, l.slug ORDER BY c.display_name, l.rank NULLS LAST", - "refId": "A" - } - ], - "title": "Per-company salary by level (p50 base)", - "type": "table" - }, - { - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "description": "p50 total comp (base + bonus + RSU/year + sign-on/year) per (company, level).", - "fieldConfig": { - "defaults": { - "color": {"mode": "continuous-GrYlRd"}, - "custom": {"align": "center", "cellOptions": {"type": "color-background"}}, - "unit": "currencyGBP" - }, - "overrides": [] - }, - "gridPos": {"h": 10, "w": 12, "x": 0, "y": 39}, - "id": 7, - "options": { - "cellHeight": "sm", - "footer": {"countRows": false, "fields": "", "reducer": ["sum"], "show": false}, - "showHeader": true - }, - "targets": [ - { - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "format": "table", - "rawQuery": true, - "rawSql": "SELECT c.display_name AS company, l.slug AS level, percentile_cont(0.5) WITHIN GROUP (ORDER BY COALESCE(cp.base_gbp, 0) + COALESCE(cp.bonus_gbp, 0) + COALESCE(cp.rsu_annual_gbp, 0) + COALESCE(cp.signon_gbp, 0)) AS p50_total_gbp FROM job_hunter.comp_points cp JOIN job_hunter.companies c ON cp.company_id = c.id LEFT JOIN job_hunter.levels l ON cp.level_id = l.id WHERE cp.base_gbp IS NOT NULL AND cp.location_bucket IN (${location:sqlstring}) GROUP BY c.display_name, l.slug ORDER BY c.display_name", - "refId": "A" - } - ], - "title": "Total comp heatmap (p50, GBP)", - "type": "table" - }, - { - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "description": "Comp-datapoint ingestion volume by source.", - "fieldConfig": { - "defaults": { - "color": {"mode": "palette-classic"}, - "custom": { - "drawStyle": "bars", - "fillOpacity": 60, - "lineWidth": 1, - "stacking": {"mode": "normal"} - } - }, - "overrides": [] - }, - "gridPos": {"h": 10, "w": 12, "x": 12, "y": 39}, - "id": 8, - "options": { - "legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, - "tooltip": {"mode": "single", "sort": "none"} - }, - "targets": [ - { - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "format": "time_series", - "rawQuery": true, - "rawSql": "SELECT date_trunc('day', fetched_at) AT TIME ZONE 'UTC' AS time, source, COUNT(*) AS value FROM job_hunter.comp_points WHERE $__timeFilter(fetched_at) GROUP BY 1, 2 ORDER BY 1", - "refId": "A" - } - ], - "title": "Comp-point volume by source", - "type": "timeseries" - }, - { - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "description": "p50 base salary trend by (company, level) for top 5 companies.", - "fieldConfig": { - "defaults": { - "color": {"mode": "palette-classic"}, - "custom": { - "drawStyle": "line", - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 6, - "showPoints": "auto" - }, - "unit": "currencyGBP" - }, - "overrides": [] - }, - "gridPos": {"h": 10, "w": 24, "x": 0, "y": 49}, - "id": 9, - "options": { - "legend": {"displayMode": "table", "placement": "right", "showLegend": true}, - "tooltip": {"mode": "multi", "sort": "desc"} - }, - "targets": [ - { - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "format": "time_series", - "rawQuery": true, - "rawSql": "WITH ranked AS (SELECT c.slug AS company_slug, COUNT(*) AS n FROM job_hunter.comp_points cp JOIN job_hunter.companies c ON cp.company_id = c.id WHERE cp.base_gbp IS NOT NULL AND cp.location_bucket IN (${location:sqlstring}) GROUP BY c.slug ORDER BY n DESC LIMIT 5) SELECT date_trunc('month', cp.effective_date)::timestamp AS time, c.display_name || ' / ' || COALESCE(l.slug, 'unknown') AS metric, percentile_cont(0.5) WITHIN GROUP (ORDER BY cp.base_gbp) AS value FROM job_hunter.comp_points cp JOIN job_hunter.companies c ON cp.company_id = c.id LEFT JOIN job_hunter.levels l ON cp.level_id = l.id WHERE cp.base_gbp IS NOT NULL AND cp.effective_date IS NOT NULL AND cp.location_bucket IN (${location:sqlstring}) AND c.slug IN (SELECT company_slug FROM ranked) AND (l.slug = ${level:sqlstring} OR ${level:sqlstring} = 'all') GROUP BY 1, 2 ORDER BY 1", - "refId": "A" - } - ], - "title": "Base-salary trend (p50) — top 5 companies", - "type": "timeseries" } ], "refresh": "", "schemaVersion": 39, "tags": ["job-hunter", "jobs", "careers"], - "templating": {"list": [ - { - "current": {"selected": true, "text": ["london"], "value": ["london"]}, - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "definition": "SELECT DISTINCT location_bucket FROM job_hunter.comp_points ORDER BY 1", - "includeAll": false, - "label": "Location", - "multi": true, - "name": "location", - "options": [], - "query": "SELECT DISTINCT location_bucket FROM job_hunter.comp_points ORDER BY 1", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": {"selected": true, "text": "senior", "value": "senior"}, - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "definition": "SELECT slug FROM job_hunter.levels WHERE company_id IS NULL ORDER BY rank", - "includeAll": true, - "allValue": "all", - "label": "Level", - "multi": false, - "name": "level", - "options": [], - "query": "SELECT slug FROM job_hunter.levels WHERE company_id IS NULL ORDER BY rank", - "refresh": 1, - "regex": "", - "type": "query" - }, - { - "current": {"selected": true, "text": "all", "value": "all"}, - "datasource": {"type": "grafana-postgresql-datasource", "uid": "job-hunter-pg"}, - "definition": "SELECT slug FROM job_hunter.companies ORDER BY slug", - "includeAll": true, - "allValue": "all", - "label": "Company", - "multi": true, - "name": "company", - "options": [], - "query": "SELECT slug FROM job_hunter.companies ORDER BY slug", - "refresh": 1, - "regex": "", - "type": "query" - } - ]}, + "templating": {"list": []}, "time": {"from": "now-30d", "to": "now"}, "timepicker": {}, "timezone": "browser", diff --git a/stacks/monitoring/modules/monitoring/dashboards/uk-payslip.json b/stacks/monitoring/modules/monitoring/dashboards/uk-payslip.json index 226a3c43..9b0c2644 100644 --- a/stacks/monitoring/modules/monitoring/dashboards/uk-payslip.json +++ b/stacks/monitoring/modules/monitoring/dashboards/uk-payslip.json @@ -179,7 +179,7 @@ { "id": 7, "title": "YTD uses \u2014 where gross went", - "description": "Year-to-date cumulative breakdown of where the gross went. Stacked \u2014 top equals gross_pay minus student loan and RSU offset (both small; shown on Panel 8 Sankey). RSU vest tax broken out at the exact band-aware marginal (PA-taper aware: 60% in the \u00a3100k\u2013\u00a3125,140 zone, 47% additional-rate, etc.) \u2014 see SQL for full bands. Green = take-home; red = cash income tax; orange = tax on RSU vest; orange = cash NI; purple = pension.", + "description": "Year-to-date cumulative breakdown of where the gross went. Stacked \u2014 top equals gross_pay minus student loan and RSU offset (both small; shown on Panel 8 Sankey). Green = take-home; red = cash income tax; orange = RSU-attributed income tax + NI; purple = pension.", "type": "timeseries", "datasource": { "type": "grafana-postgresql-datasource", @@ -258,14 +258,14 @@ }, { "id": "displayName", - "value": "Income Tax (cash)" + "value": "Income Tax (cash pay)" } ] }, { "matcher": { "id": "byName", - "options": "ytd_rsu_tax_marginal" + "options": "ytd_rsu_income_tax" }, "properties": [ { @@ -277,14 +277,14 @@ }, { "id": "displayName", - "value": "Tax on RSU vest (band-aware marginal)" + "value": "Income Tax (RSU-attributed)" } ] }, { "matcher": { "id": "byName", - "options": "ytd_cash_ni" + "options": "ytd_ni" }, "properties": [ { @@ -296,7 +296,7 @@ }, { "id": "displayName", - "value": "National Insurance (cash)" + "value": "National Insurance" } ] }, @@ -341,7 +341,7 @@ "type": "grafana-postgresql-datasource", "uid": "payslips-pg" }, - "rawSql": "WITH r AS (SELECT * FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date)), ani AS (SELECT *, COALESCE(SUM(gross_pay - COALESCE(pension_sacrifice, 0)) OVER (PARTITION BY tax_year ORDER BY pay_date ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) AS ani_prior FROM r), slice AS (SELECT *, ani_prior + gross_pay - COALESCE(rsu_vest, 0) - COALESCE(pension_sacrifice, 0) AS ani_pre, ani_prior + gross_pay - COALESCE(pension_sacrifice, 0) AS ani_post FROM ani), m AS (SELECT *, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.20 + GREATEST(0, LEAST(ani_post, 100000) - GREATEST(ani_pre, 50270)) * 0.40 + GREATEST(0, LEAST(ani_post, 125140) - GREATEST(ani_pre, 100000)) * 0.60 + GREATEST(0, ani_post - GREATEST(ani_pre, 125140)) * 0.45 AS rsu_paye_marginal, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.08 + GREATEST(0, ani_post - GREATEST(ani_pre, 50270)) * 0.02 AS rsu_ni_marginal FROM slice) SELECT pay_date AS \"time\", SUM(net_pay) OVER w AS ytd_net, SUM(GREATEST(0, income_tax - rsu_paye_marginal)) OVER w AS ytd_cash_income_tax, SUM(rsu_paye_marginal + rsu_ni_marginal) OVER w AS ytd_rsu_tax_marginal, SUM(GREATEST(0, national_insurance - rsu_ni_marginal)) OVER w AS ytd_cash_ni, SUM(pension_employee) OVER w AS ytd_pension_employee FROM m WINDOW w AS (PARTITION BY tax_year ORDER BY pay_date) ORDER BY pay_date", + "rawSql": "SELECT pay_date AS \"time\", SUM(net_pay) OVER w AS ytd_net, SUM(COALESCE(cash_income_tax, income_tax)) OVER w AS ytd_cash_income_tax, SUM(income_tax - COALESCE(cash_income_tax, income_tax)) OVER w AS ytd_rsu_income_tax, SUM(national_insurance) OVER w AS ytd_ni, SUM(pension_employee) OVER w AS ytd_pension_employee FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date) WINDOW w AS (PARTITION BY tax_year ORDER BY pay_date) ORDER BY pay_date", "format": "time_series", "refId": "A", "rawQuery": true, @@ -352,7 +352,6 @@ { "id": 2, "title": "Monthly cash flow (RSU stripped)", - "description": "Cash-only view: gross pay minus the RSU vest (cash_gross) and the bank-deposited net_pay. Tax and NI are not shown here because UK cumulative PAYE genuinely takes a YTD true-up chunk in vest months on top of the marginal RSU PAYE \u2014 see Panel 11 for the full tax breakdown with the band-aware RSU split.", "type": "timeseries", "datasource": { "type": "grafana-postgresql-datasource", @@ -423,7 +422,7 @@ "type": "grafana-postgresql-datasource", "uid": "payslips-pg" }, - "rawSql": "SELECT pay_date AS \"time\", (gross_pay - rsu_vest) AS cash_gross, net_pay FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date) ORDER BY pay_date", + "rawSql": "SELECT pay_date AS \"time\", (gross_pay - rsu_vest) AS cash_gross, net_pay, COALESCE(cash_income_tax, income_tax) AS income_tax, national_insurance FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date) ORDER BY pay_date", "format": "time_series", "refId": "A", "rawQuery": true, @@ -434,7 +433,7 @@ { "id": 3, "title": "Effective rate & take-home % (YTD cumulative)", - "description": "YTD-cumulative rates \u2014 three angles on take-home. (1) PAYE rate = SUM(income_tax) / SUM(taxable_pay): the audit number HMRC uses, converges to ~marginal in the additional-rate band. (2) Cash take-home % = SUM(net_pay) / SUM(gross_pay - rsu_vest): what fraction of cash earnings becomes a bank deposit; useful for cash-flow planning. (3) Total keep % = (SUM(net_pay) + SUM(rsu_vest - rsu_paye_marginal - rsu_ni_marginal)) / SUM(gross_pay): true 'what I actually keep' including post-tax RSU shares with the exact band-aware marginal (PA-taper aware). Resets on 6-April tax year boundary.", + "description": "YTD-cumulative rates. PAYE rate uses reported taxable_pay as the base; all-deductions rate uses gross_pay. Computed from cumulative SUM over the tax year, so vest-month RSU tax is blended proportionally with RSU value \u2014 no per-slip attribution hack, no spikes.", "type": "timeseries", "datasource": { "type": "grafana-postgresql-datasource", @@ -485,65 +484,7 @@ } } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "ytd_paye_rate_pct" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "#C4162A" - } - }, - { - "id": "displayName", - "value": "PAYE rate (HMRC, on taxable_pay)" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "ytd_cash_take_home_pct" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } - }, - { - "id": "displayName", - "value": "Cash take-home % (net / cash_gross)" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "ytd_total_keep_pct" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "blue" - } - }, - { - "id": "displayName", - "value": "Total keep % (cash + post-tax shares)" - } - ] - } - ] + "overrides": [] }, "options": { "legend": { @@ -565,7 +506,7 @@ "type": "grafana-postgresql-datasource", "uid": "payslips-pg" }, - "rawSql": "WITH r AS (SELECT * FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date)), ani AS (SELECT *, COALESCE(SUM(gross_pay - COALESCE(pension_sacrifice, 0)) OVER (PARTITION BY tax_year ORDER BY pay_date ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) AS ani_prior FROM r), slice AS (SELECT *, ani_prior + gross_pay - COALESCE(rsu_vest, 0) - COALESCE(pension_sacrifice, 0) AS ani_pre, ani_prior + gross_pay - COALESCE(pension_sacrifice, 0) AS ani_post FROM ani), m AS (SELECT *, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.20 + GREATEST(0, LEAST(ani_post, 100000) - GREATEST(ani_pre, 50270)) * 0.40 + GREATEST(0, LEAST(ani_post, 125140) - GREATEST(ani_pre, 100000)) * 0.60 + GREATEST(0, ani_post - GREATEST(ani_pre, 125140)) * 0.45 AS rsu_paye_marginal, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.08 + GREATEST(0, ani_post - GREATEST(ani_pre, 50270)) * 0.02 AS rsu_ni_marginal FROM slice) SELECT pay_date AS \"time\", ROUND(((SUM(income_tax) OVER w)::numeric / NULLIF(SUM(COALESCE(taxable_pay, gross_pay)) OVER w, 0)) * 100, 2) AS \"ytd_paye_rate_pct\", ROUND(((SUM(net_pay) OVER w)::numeric / NULLIF(SUM(gross_pay - rsu_vest) OVER w, 0)) * 100, 2) AS \"ytd_cash_take_home_pct\", ROUND((((SUM(net_pay) OVER w) + (SUM(rsu_vest - rsu_paye_marginal - rsu_ni_marginal) OVER w))::numeric / NULLIF(SUM(gross_pay) OVER w, 0)) * 100, 2) AS \"ytd_total_keep_pct\" FROM m WINDOW w AS (PARTITION BY tax_year ORDER BY pay_date) ORDER BY pay_date", + "rawSql": "SELECT pay_date AS \"time\", ROUND(((SUM(income_tax) OVER w)::numeric / NULLIF(SUM(COALESCE(taxable_pay, gross_pay)) OVER w, 0)) * 100, 2) AS \"ytd_paye_rate_pct\", ROUND((((SUM(income_tax) OVER w) + (SUM(national_insurance) OVER w) + (SUM(student_loan) OVER w))::numeric / NULLIF(SUM(gross_pay) OVER w, 0)) * 100, 2) AS \"ytd_all_deductions_pct\", ROUND(((SUM(net_pay) OVER w)::numeric / NULLIF(SUM(gross_pay) OVER w, 0)) * 100, 2) AS \"ytd_take_home_pct\" FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date) WINDOW w AS (PARTITION BY tax_year ORDER BY pay_date) ORDER BY pay_date", "format": "time_series", "refId": "A", "rawQuery": true, @@ -576,7 +517,7 @@ { "id": 11, "title": "Tax & pension \u2014 monthly", - "description": "Per-month RSU vest tax + recurring deductions. Cash-side PAYE/NI hidden because UK cumulative PAYE makes them inherently bumpy in vest months despite the marginal RSU strip \u2014 see Panel 12 (YTD cumulative) for the smoothed totals or Panel 3 for the effective rate. Orange = tax on RSU vest at the exact band-aware marginal (PA-taper aware: 60% in \u00a3100k\u2013\u00a3125,140 zone); brown = student loan; purple = employee pension; light purple = employer pension (paid on top of salary).", + "description": "Per-month deductions and pension contributions. Stacked \u2014 top equals total tax + pension (both sides). Red = cash income tax; orange = RSU-attributed income tax; amber = NI; brown = student loan; purple = employee pension; light purple = employer pension (paid on top of salary).", "type": "timeseries", "datasource": { "type": "grafana-postgresql-datasource", @@ -624,7 +565,26 @@ { "matcher": { "id": "byName", - "options": "rsu_tax_marginal" + "options": "cash_income_tax" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "#C4162A" + } + }, + { + "id": "displayName", + "value": "Income Tax (cash pay)" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "rsu_income_tax" }, "properties": [ { @@ -636,7 +596,26 @@ }, { "id": "displayName", - "value": "Tax on RSU vest (band-aware marginal)" + "value": "Income Tax (RSU-attributed)" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ni" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "orange" + } + }, + { + "id": "displayName", + "value": "National Insurance" } ] }, @@ -719,7 +698,7 @@ "type": "grafana-postgresql-datasource", "uid": "payslips-pg" }, - "rawSql": "WITH r AS (SELECT * FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date)), ani AS (SELECT *, COALESCE(SUM(gross_pay - COALESCE(pension_sacrifice, 0)) OVER (PARTITION BY tax_year ORDER BY pay_date ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) AS ani_prior FROM r), slice AS (SELECT *, ani_prior + gross_pay - COALESCE(rsu_vest, 0) - COALESCE(pension_sacrifice, 0) AS ani_pre, ani_prior + gross_pay - COALESCE(pension_sacrifice, 0) AS ani_post FROM ani), m AS (SELECT *, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.20 + GREATEST(0, LEAST(ani_post, 100000) - GREATEST(ani_pre, 50270)) * 0.40 + GREATEST(0, LEAST(ani_post, 125140) - GREATEST(ani_pre, 100000)) * 0.60 + GREATEST(0, ani_post - GREATEST(ani_pre, 125140)) * 0.45 AS rsu_paye_marginal, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.08 + GREATEST(0, ani_post - GREATEST(ani_pre, 50270)) * 0.02 AS rsu_ni_marginal FROM slice) SELECT pay_date AS \"time\", (rsu_paye_marginal + rsu_ni_marginal) AS rsu_tax_marginal, student_loan, pension_employee, pension_employer FROM m ORDER BY pay_date", + "rawSql": "SELECT pay_date AS \"time\", COALESCE(cash_income_tax, income_tax) AS cash_income_tax, income_tax - COALESCE(cash_income_tax, income_tax) AS rsu_income_tax, national_insurance AS ni, student_loan, pension_employee, pension_employer FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date) ORDER BY pay_date", "format": "time_series", "refId": "A", "rawQuery": true, @@ -730,7 +709,7 @@ { "id": 12, "title": "Tax & pension \u2014 YTD cumulative", - "description": "Year-to-date cumulative tax and pension. Same series and colors as the monthly panel \u2014 RSU vest tax broken out at the exact band-aware marginal (PA-taper aware: 60% in \u00a3100k\u2013\u00a3125,140 zone, 47% additional-rate, etc.). Resets on 6-April tax year boundary.", + "description": "Year-to-date cumulative tax and pension. Same series and colors as the monthly panel; resets on 6-April tax year boundary.", "type": "timeseries", "datasource": { "type": "grafana-postgresql-datasource", @@ -790,14 +769,14 @@ }, { "id": "displayName", - "value": "Income Tax (cash)" + "value": "Income Tax (cash pay)" } ] }, { "matcher": { "id": "byName", - "options": "ytd_rsu_tax_marginal" + "options": "ytd_rsu_income_tax" }, "properties": [ { @@ -809,14 +788,14 @@ }, { "id": "displayName", - "value": "Tax on RSU vest (band-aware marginal)" + "value": "Income Tax (RSU-attributed)" } ] }, { "matcher": { "id": "byName", - "options": "ytd_cash_ni" + "options": "ytd_ni" }, "properties": [ { @@ -828,7 +807,7 @@ }, { "id": "displayName", - "value": "National Insurance (cash)" + "value": "National Insurance" } ] }, @@ -911,7 +890,7 @@ "type": "grafana-postgresql-datasource", "uid": "payslips-pg" }, - "rawSql": "WITH r AS (SELECT * FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date)), ani AS (SELECT *, COALESCE(SUM(gross_pay - COALESCE(pension_sacrifice, 0)) OVER (PARTITION BY tax_year ORDER BY pay_date ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) AS ani_prior FROM r), slice AS (SELECT *, ani_prior + gross_pay - COALESCE(rsu_vest, 0) - COALESCE(pension_sacrifice, 0) AS ani_pre, ani_prior + gross_pay - COALESCE(pension_sacrifice, 0) AS ani_post FROM ani), m AS (SELECT *, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.20 + GREATEST(0, LEAST(ani_post, 100000) - GREATEST(ani_pre, 50270)) * 0.40 + GREATEST(0, LEAST(ani_post, 125140) - GREATEST(ani_pre, 100000)) * 0.60 + GREATEST(0, ani_post - GREATEST(ani_pre, 125140)) * 0.45 AS rsu_paye_marginal, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.08 + GREATEST(0, ani_post - GREATEST(ani_pre, 50270)) * 0.02 AS rsu_ni_marginal FROM slice) SELECT pay_date AS \"time\", SUM(GREATEST(0, income_tax - rsu_paye_marginal)) OVER w AS ytd_cash_income_tax, SUM(rsu_paye_marginal + rsu_ni_marginal) OVER w AS ytd_rsu_tax_marginal, SUM(GREATEST(0, national_insurance - rsu_ni_marginal)) OVER w AS ytd_cash_ni, SUM(student_loan) OVER w AS ytd_student_loan, SUM(pension_employee) OVER w AS ytd_pension_employee, SUM(pension_employer) OVER w AS ytd_pension_employer FROM m WINDOW w AS (PARTITION BY tax_year ORDER BY pay_date) ORDER BY pay_date", + "rawSql": "SELECT pay_date AS \"time\", SUM(COALESCE(cash_income_tax, income_tax)) OVER w AS ytd_cash_income_tax, SUM(income_tax - COALESCE(cash_income_tax, income_tax)) OVER w AS ytd_rsu_income_tax, SUM(national_insurance) OVER w AS ytd_ni, SUM(student_loan) OVER w AS ytd_student_loan, SUM(pension_employee) OVER w AS ytd_pension_employee, SUM(pension_employer) OVER w AS ytd_pension_employer FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date) WINDOW w AS (PARTITION BY tax_year ORDER BY pay_date) ORDER BY pay_date", "format": "time_series", "refId": "A", "rawQuery": true, @@ -932,7 +911,7 @@ "h": 9, "w": 24, "x": 0, - "y": 39 + "y": 29 }, "fieldConfig": { "defaults": { @@ -942,20 +921,20 @@ "unit": "currencyGBP", "custom": { "axisPlacement": "auto", - "drawStyle": "bars", - "fillOpacity": 100, + "drawStyle": "line", + "fillOpacity": 70, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineWidth": 0, + "lineWidth": 1, "pointSize": 4, "scaleDistribution": { "type": "linear" }, - "showPoints": "never", + "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", @@ -1049,8 +1028,7 @@ "legend": { "calcs": [ "last", - "max", - "sum" + "max" ], "displayMode": "table", "placement": "bottom" @@ -1086,7 +1064,7 @@ "h": 6, "w": 24, "x": 0, - "y": 62 + "y": 38 }, "fieldConfig": { "defaults": { @@ -1260,7 +1238,7 @@ "h": 14, "w": 24, "x": 0, - "y": 68 + "y": 44 }, "fieldConfig": { "defaults": { @@ -1436,7 +1414,7 @@ "h": 12, "w": 24, "x": 0, - "y": 82 + "y": 58 }, "fieldConfig": { "defaults": { @@ -1746,7 +1724,7 @@ "h": 14, "w": 24, "x": 0, - "y": 48 + "y": 70 }, "options": { "monochrome": false, @@ -1767,7 +1745,7 @@ "rawQuery": true, "editorMode": "code", "format": "table", - "rawSql": "WITH r AS (SELECT * FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date)), ani AS (SELECT *, COALESCE(SUM(gross_pay - COALESCE(pension_sacrifice, 0)) OVER (PARTITION BY tax_year ORDER BY pay_date ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) AS ani_prior FROM r), slice AS (SELECT *, ani_prior + gross_pay - COALESCE(rsu_vest, 0) - COALESCE(pension_sacrifice, 0) AS ani_pre, ani_prior + gross_pay - COALESCE(pension_sacrifice, 0) AS ani_post FROM ani), m AS (SELECT *, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.20 + GREATEST(0, LEAST(ani_post, 100000) - GREATEST(ani_pre, 50270)) * 0.40 + GREATEST(0, LEAST(ani_post, 125140) - GREATEST(ani_pre, 100000)) * 0.60 + GREATEST(0, ani_post - GREATEST(ani_pre, 125140)) * 0.45 AS rsu_paye_marginal, GREATEST(0, LEAST(ani_post, 12570) - GREATEST(ani_pre, 0)) * 0.00 + GREATEST(0, LEAST(ani_post, 50270) - GREATEST(ani_pre, 12570)) * 0.08 + GREATEST(0, ani_post - GREATEST(ani_pre, 50270)) * 0.02 AS rsu_ni_marginal FROM slice), agg AS (SELECT COALESCE(SUM(salary), 0) AS salary, COALESCE(SUM(bonus), 0) AS bonus, COALESCE(SUM(rsu_vest), 0) AS rsu_vest, COALESCE(SUM(GREATEST(gross_pay - salary - bonus - rsu_vest, 0)), 0) AS other_income, COALESCE(SUM(net_pay), 0) AS net_pay, COALESCE(SUM(GREATEST(0, income_tax - rsu_paye_marginal)), 0) AS cash_income_tax, COALESCE(SUM(rsu_paye_marginal + rsu_ni_marginal), 0) AS rsu_tax_marginal, COALESCE(SUM(GREATEST(0, national_insurance - rsu_ni_marginal)), 0) AS cash_ni, COALESCE(SUM(pension_employee), 0) AS pension, COALESCE(SUM(student_loan), 0) AS student_loan, COALESCE(SUM(rsu_offset), 0) AS rsu_offset FROM m) SELECT 'Salary' AS source, 'Gross' AS target, salary AS value FROM agg WHERE salary > 0 UNION ALL SELECT 'Bonus', 'Gross', bonus FROM agg WHERE bonus > 0 UNION ALL SELECT 'RSU', 'Gross', rsu_vest FROM agg WHERE rsu_vest > 0 UNION ALL SELECT 'Other income', 'Gross', other_income FROM agg WHERE other_income > 0 UNION ALL SELECT 'Gross', 'Net pay', net_pay FROM agg WHERE net_pay > 0 UNION ALL SELECT 'Gross', 'Income Tax (cash)', cash_income_tax FROM agg WHERE cash_income_tax > 0 UNION ALL SELECT 'Gross', 'Tax on RSU vest', rsu_tax_marginal FROM agg WHERE rsu_tax_marginal > 0 UNION ALL SELECT 'Gross', 'National Insurance (cash)', cash_ni FROM agg WHERE cash_ni > 0 UNION ALL SELECT 'Gross', 'Pension', pension FROM agg WHERE pension > 0 UNION ALL SELECT 'Gross', 'Student Loan', student_loan FROM agg WHERE student_loan > 0 UNION ALL SELECT 'Gross', 'RSU Offset', rsu_offset FROM agg WHERE rsu_offset > 0" + "rawSql": "WITH agg AS (SELECT COALESCE(SUM(salary), 0) AS salary, COALESCE(SUM(bonus), 0) AS bonus, COALESCE(SUM(rsu_vest), 0) AS rsu_vest, COALESCE(SUM(GREATEST(gross_pay - salary - bonus - rsu_vest, 0)), 0) AS other_income, COALESCE(SUM(net_pay), 0) AS net_pay, COALESCE(SUM(COALESCE(cash_income_tax, income_tax)), 0) AS cash_income_tax, COALESCE(SUM(income_tax - COALESCE(cash_income_tax, income_tax)), 0) AS rsu_income_tax, COALESCE(SUM(national_insurance), 0) AS ni, COALESCE(SUM(pension_employee), 0) AS pension, COALESCE(SUM(student_loan), 0) AS student_loan, COALESCE(SUM(rsu_offset), 0) AS rsu_offset FROM payslip_ingest.payslip WHERE $__timeFilter(pay_date)) SELECT 'Salary' AS source, 'Gross' AS target, salary AS value FROM agg WHERE salary > 0 UNION ALL SELECT 'Bonus', 'Gross', bonus FROM agg WHERE bonus > 0 UNION ALL SELECT 'RSU', 'Gross', rsu_vest FROM agg WHERE rsu_vest > 0 UNION ALL SELECT 'Other income', 'Gross', other_income FROM agg WHERE other_income > 0 UNION ALL SELECT 'Gross', 'Net pay', net_pay FROM agg WHERE net_pay > 0 UNION ALL SELECT 'Gross', 'Income Tax (cash)', cash_income_tax FROM agg WHERE cash_income_tax > 0 UNION ALL SELECT 'Gross', 'Income Tax (RSU)', rsu_income_tax FROM agg WHERE rsu_income_tax > 0 UNION ALL SELECT 'Gross', 'National Insurance', ni FROM agg WHERE ni > 0 UNION ALL SELECT 'Gross', 'Pension', pension FROM agg WHERE pension > 0 UNION ALL SELECT 'Gross', 'Student Loan', student_loan FROM agg WHERE student_loan > 0 UNION ALL SELECT 'Gross', 'RSU Offset', rsu_offset FROM agg WHERE rsu_offset > 0" } ] }, @@ -1784,7 +1762,7 @@ "h": 10, "w": 24, "x": 0, - "y": 94 + "y": 84 }, "fieldConfig": { "defaults": { @@ -1930,142 +1908,116 @@ ] }, { - "id": 16, - "title": "Yearly receipt \u2014 gross income per tax year", - "description": "One stacked bar per tax year showing all gross income components: salary (cash, post-pension-sacrifice), pension (salary-sacrifice \u2014 untaxed but real income), bonus, and RSU vest gross. Bar total = pre-sacrifice gross compensation. Aligns with P60: bar \u2212 pension_sacrifice \u2248 ytd_gross reported on the final March payslip / P60. Where the parser correctly captured bonus into gross_pay (every year except 2023/24 and 2024/25 \u2014 March payslip parsing bug), the match is exact.", - "type": "barchart", + "id": 10, + "title": "HMRC Tax Year Reconciliation \u2014 Individual Tax API", + "description": "Latest snapshot from HMRC Individual Tax API v1.1 vs SUM(payslip.income_tax) per tax year. Delta > \u00a310 turns red \u2014 that's parser drift vs HMRC's held figures, the authoritative ground truth. Shown only for years where hmrc-sync has pulled a snapshot.", + "type": "table", "datasource": { "type": "grafana-postgresql-datasource", "uid": "payslips-pg" }, "gridPos": { "h": 10, - "w": 12, + "w": 24, "x": 0, - "y": 29 + "y": 94 }, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "unit": "currencyGBP", "custom": { - "axisPlacement": "auto", - "axisLabel": "", - "axisCenteredZero": false, - "fillOpacity": 80, - "gradientMode": "none", - "lineWidth": 1, - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "thresholdsStyle": { - "mode": "off" - } + "align": "right", + "displayMode": "auto" } }, "overrides": [ { "matcher": { - "id": "byName", - "options": "salary_cash" + "id": "byRegexp", + "options": "^delta_" }, "properties": [ { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } + "id": "custom.displayMode", + "value": "color-background" }, { - "id": "displayName", - "value": "Salary (cash, post-sacrifice)" + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "green", + "value": -10 + }, + { + "color": "red", + "value": 10 + }, + { + "color": "red", + "value": -10 + } + ] + } } ] }, { "matcher": { "id": "byName", - "options": "pension_sacrifice" + "options": "tax_year" }, "properties": [ { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "#CE96D8" - } + "id": "unit", + "value": "string" }, { - "id": "displayName", - "value": "Pension (salary sacrifice, untaxed)" + "id": "custom.align", + "value": "left" } ] }, { "matcher": { "id": "byName", - "options": "bonus" + "options": "employer_paye_ref" }, "properties": [ { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "#FADE2A" - } + "id": "unit", + "value": "string" }, { - "id": "displayName", - "value": "Bonus (gross)" + "id": "custom.align", + "value": "left" } ] }, { "matcher": { "id": "byName", - "options": "rsu_gross" + "options": "snapshot_date" }, "properties": [ { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "#3274D9" - } - }, - { - "id": "displayName", - "value": "RSU vest (gross)" + "id": "unit", + "value": "dateTimeAsIso" } ] } ] }, "options": { - "barRadius": 0, - "barWidth": 0.6, - "groupWidth": 0.7, - "orientation": "auto", - "showValue": "auto", - "stacking": "normal", - "xField": "tax_year", - "xTickLabelRotation": 0, - "xTickLabelSpacing": 0, - "legend": { - "calcs": [ - "sum" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" + "showHeader": true, + "cellHeight": "sm", + "footer": { + "show": false } }, "targets": [ @@ -2078,74 +2030,7 @@ "rawQuery": true, "editorMode": "code", "format": "table", - "rawSql": "SELECT tax_year, SUM(salary - COALESCE(pension_sacrifice, 0)) AS salary_cash, SUM(COALESCE(pension_sacrifice, 0)) AS pension_sacrifice, SUM(bonus) AS bonus, SUM(rsu_vest) AS rsu_gross FROM payslip_ingest.payslip GROUP BY tax_year ORDER BY tax_year" - } - ] - }, - { - "id": 17, - "title": "YTD gross salary \u2014 year-over-year comparison", - "description": "Cumulative gross pay built up month by month within each UK tax year (April \u2192 March). One line per tax year. Pay dates are projected onto a sliding 12-month window ending now, so years overlay cleanly without falling outside the dashboard's time range. X-axis shows month-of-tax-year (April first, March last).", - "type": "timeseries", - "timeFrom": "13M", - "datasource": { - "type": "grafana-postgresql-datasource", - "uid": "payslips-pg" - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 29 - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "unit": "currencyGBP", - "decimals": 0, - "custom": { - "drawStyle": "line", - "lineWidth": 2, - "fillOpacity": 0, - "pointSize": 5, - "showPoints": "auto", - "spanNulls": true, - "axisPlacement": "auto", - "stacking": { - "group": "A", - "mode": "none" - } - } - }, - "overrides": [] - }, - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "refId": "A", - "datasource": { - "type": "grafana-postgresql-datasource", - "uid": "payslips-pg" - }, - "rawQuery": true, - "editorMode": "code", - "format": "time_series", - "rawSql": "WITH projected AS (SELECT ((CURRENT_DATE - INTERVAL '12 months')::date + (pay_date - MAKE_DATE(SUBSTRING(tax_year, 1, 4)::int, 4, 6)))::timestamp AS t, tax_year, SUM(gross_pay) OVER (PARTITION BY tax_year ORDER BY pay_date) AS ytd FROM payslip_ingest.payslip) SELECT t AS \"time\", tax_year AS metric, ytd AS ytd_gross FROM projected ORDER BY t, tax_year" + "rawSql": "WITH latest AS (SELECT DISTINCT ON (tax_year, employer_paye_ref) tax_year, employer_paye_ref, snapshot_date, gross_pay, income_tax, ni_contributions FROM hmrc_sync.tax_year_snapshot ORDER BY tax_year, employer_paye_ref, snapshot_date DESC), summed AS (SELECT tax_year, COALESCE(SUM(gross_pay), 0) AS sum_gross, COALESCE(SUM(income_tax), 0) AS sum_tax, COALESCE(SUM(national_insurance), 0) AS sum_ni FROM payslip_ingest.payslip GROUP BY tax_year) SELECT l.tax_year, l.employer_paye_ref, l.snapshot_date, l.gross_pay AS hmrc_gross, s.sum_gross AS computed_gross, (l.gross_pay - s.sum_gross) AS delta_gross, l.income_tax AS hmrc_tax, s.sum_tax AS computed_tax, (l.income_tax - s.sum_tax) AS delta_tax, l.ni_contributions AS hmrc_ni, s.sum_ni AS computed_ni, (l.ni_contributions - s.sum_ni) AS delta_ni FROM latest l LEFT JOIN summed s ON s.tax_year = l.tax_year ORDER BY l.tax_year DESC" } ] } diff --git a/stacks/monitoring/modules/monitoring/dashboards/wealth.json b/stacks/monitoring/modules/monitoring/dashboards/wealth.json deleted file mode 100644 index 1b8a8aed..00000000 --- a/stacks/monitoring/modules/monitoring/dashboards/wealth.json +++ /dev/null @@ -1,671 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": {"type": "datasource", "uid": "grafana"}, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Wealth — net worth, contributions, and growth over time. Backed by the wealthfolio_sync PG mirror of Wealthfolio's SQLite, refreshed hourly by the pg-sync sidecar.", - "editable": true, - "fiscalYearStartMonth": 0, - "id": null, - "links": [], - "panels": [ - { - "id": 1, - "title": "Net worth (current)", - "type": "stat", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 4, "w": 5, "x": 0, "y": 0}, - "fieldConfig": { - "defaults": { - "unit": "currencyGBP", - "color": {"mode": "fixed", "fixedColor": "green"}, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, - "textMode": "auto" - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT SUM(total_value) AS net_worth FROM daily_account_valuation WHERE valuation_date = (SELECT MAX(valuation_date) FROM daily_account_valuation)" - } - ] - }, - { - "id": 2, - "title": "Net contribution (cumulative)", - "description": "Total deposits minus withdrawals across all accounts.", - "type": "stat", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 4, "w": 5, "x": 5, "y": 0}, - "fieldConfig": { - "defaults": { - "unit": "currencyGBP", - "color": {"mode": "fixed", "fixedColor": "blue"}, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, - "textMode": "auto" - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT SUM(net_contribution) AS contribution FROM daily_account_valuation WHERE valuation_date = (SELECT MAX(valuation_date) FROM daily_account_valuation)" - } - ] - }, - { - "id": 3, - "title": "Growth (unrealised)", - "description": "Net worth minus net contribution — the gain on everything you've put in.", - "type": "stat", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 4, "w": 5, "x": 10, "y": 0}, - "fieldConfig": { - "defaults": { - "unit": "currencyGBP", - "color": {"mode": "thresholds"}, - "decimals": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - {"color": "red", "value": null}, - {"color": "green", "value": 0} - ] - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, - "textMode": "auto" - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT (SUM(total_value) - SUM(net_contribution)) AS growth FROM daily_account_valuation WHERE valuation_date = (SELECT MAX(valuation_date) FROM daily_account_valuation)" - } - ] - }, - { - "id": 4, - "title": "ROI %", - "description": "Growth / net contribution × 100. Excludes accounts with zero/negative contribution (Schwab) to avoid distortion.", - "type": "stat", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 4, "w": 5, "x": 15, "y": 0}, - "fieldConfig": { - "defaults": { - "unit": "percent", - "color": {"mode": "thresholds"}, - "decimals": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - {"color": "red", "value": null}, - {"color": "yellow", "value": 0}, - {"color": "green", "value": 5} - ] - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, - "textMode": "auto" - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "WITH latest AS (SELECT * FROM daily_account_valuation WHERE valuation_date = (SELECT MAX(valuation_date) FROM daily_account_valuation) AND net_contribution > 0) SELECT (SUM(total_value - net_contribution) / NULLIF(SUM(net_contribution), 0) * 100) AS roi_pct FROM latest" - } - ] - }, - { - "id": 5, - "title": "Net worth — total over time", - "description": "Daily total_value summed across all accounts (base GBP).", - "type": "timeseries", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 10, "w": 24, "x": 0, "y": 4}, - "fieldConfig": { - "defaults": { - "color": {"mode": "fixed", "fixedColor": "green"}, - "unit": "currencyGBP", - "custom": { - "drawStyle": "line", - "lineWidth": 2, - "fillOpacity": 20, - "pointSize": 4, - "showPoints": "never", - "spanNulls": true, - "axisPlacement": "auto", - "stacking": {"group": "A", "mode": "none"} - } - }, - "overrides": [ - { - "matcher": {"id": "byName", "options": "net_worth"}, - "properties": [{"id": "displayName", "value": "Net worth"}] - } - ] - }, - "options": { - "legend": {"calcs": ["last", "max"], "displayMode": "table", "placement": "bottom"}, - "tooltip": {"mode": "multi", "sort": "desc"} - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "time_series", - "rawSql": "SELECT valuation_date::timestamp AS \"time\", SUM(total_value) AS net_worth FROM daily_account_valuation WHERE $__timeFilter(valuation_date) GROUP BY valuation_date ORDER BY valuation_date" - } - ] - }, - { - "id": 6, - "title": "Net contribution vs market value", - "description": "Net contribution = cumulative deposits − withdrawals. Market value = total_value (cash + investments). Gap between the two = unrealised growth.", - "type": "timeseries", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 10, "w": 12, "x": 0, "y": 14}, - "fieldConfig": { - "defaults": { - "color": {"mode": "palette-classic"}, - "unit": "currencyGBP", - "custom": { - "drawStyle": "line", - "lineWidth": 2, - "fillOpacity": 0, - "pointSize": 4, - "showPoints": "never", - "spanNulls": true, - "axisPlacement": "auto", - "stacking": {"group": "A", "mode": "none"} - } - }, - "overrides": [ - { - "matcher": {"id": "byName", "options": "market_value"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}, - {"id": "displayName", "value": "Market value"} - ] - }, - { - "matcher": {"id": "byName", "options": "net_contribution"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "blue"}}, - {"id": "displayName", "value": "Net contribution"} - ] - } - ] - }, - "options": { - "legend": {"calcs": ["last"], "displayMode": "table", "placement": "bottom"}, - "tooltip": {"mode": "multi", "sort": "desc"} - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "time_series", - "rawSql": "SELECT valuation_date::timestamp AS \"time\", SUM(net_contribution) AS net_contribution, SUM(total_value) AS market_value FROM daily_account_valuation WHERE $__timeFilter(valuation_date) GROUP BY valuation_date ORDER BY valuation_date" - } - ] - }, - { - "id": 7, - "title": "Growth (market value − contribution) over time", - "description": "Unrealised gain across all accounts. Filled area to emphasise the wealth created above the contributed capital.", - "type": "timeseries", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 10, "w": 12, "x": 12, "y": 14}, - "fieldConfig": { - "defaults": { - "color": {"mode": "fixed", "fixedColor": "#56A64B"}, - "unit": "currencyGBP", - "custom": { - "drawStyle": "line", - "lineWidth": 2, - "fillOpacity": 50, - "gradientMode": "opacity", - "pointSize": 4, - "showPoints": "never", - "spanNulls": true, - "axisPlacement": "auto", - "stacking": {"group": "A", "mode": "none"} - } - }, - "overrides": [ - { - "matcher": {"id": "byName", "options": "growth"}, - "properties": [{"id": "displayName", "value": "Growth"}] - } - ] - }, - "options": { - "legend": {"calcs": ["last", "max"], "displayMode": "table", "placement": "bottom"}, - "tooltip": {"mode": "multi", "sort": "desc"} - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "time_series", - "rawSql": "SELECT valuation_date::timestamp AS \"time\", (SUM(total_value) - SUM(net_contribution)) AS growth FROM daily_account_valuation WHERE $__timeFilter(valuation_date) GROUP BY valuation_date ORDER BY valuation_date" - } - ] - }, - { - "id": 8, - "title": "Per-account stacked — total value", - "description": "Stacked area showing each account's contribution to total net worth over time. Useful for spotting which account drives the trajectory.", - "type": "timeseries", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 11, "w": 24, "x": 0, "y": 24}, - "fieldConfig": { - "defaults": { - "color": {"mode": "palette-classic"}, - "unit": "currencyGBP", - "custom": { - "drawStyle": "line", - "lineWidth": 1, - "fillOpacity": 70, - "pointSize": 3, - "showPoints": "never", - "spanNulls": true, - "axisPlacement": "auto", - "stacking": {"group": "A", "mode": "normal"} - } - }, - "overrides": [] - }, - "options": { - "legend": {"calcs": ["last"], "displayMode": "table", "placement": "bottom"}, - "tooltip": {"mode": "multi", "sort": "desc"} - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "time_series", - "rawSql": "SELECT d.valuation_date::timestamp AS \"time\", a.name AS metric, d.total_value AS value FROM daily_account_valuation d JOIN accounts a ON a.id = d.account_id WHERE $__timeFilter(d.valuation_date) ORDER BY d.valuation_date, a.name" - } - ] - }, - { - "id": 9, - "title": "Cash vs invested (stacked)", - "description": "Daily breakdown of uninvested broker cash vs market value of investments. WORKPLACE_PENSION accounts (Fidelity) are reclassified entirely as invested — Wealthfolio dumps pension wrappers into cash_balance because it doesn't track the underlying fund holdings, but they are not actually cash.", - "type": "timeseries", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 10, "w": 24, "x": 0, "y": 35}, - "fieldConfig": { - "defaults": { - "color": {"mode": "palette-classic"}, - "unit": "currencyGBP", - "custom": { - "drawStyle": "line", - "lineWidth": 1, - "fillOpacity": 70, - "pointSize": 3, - "showPoints": "never", - "spanNulls": true, - "axisPlacement": "auto", - "stacking": {"group": "A", "mode": "normal"} - } - }, - "overrides": [ - { - "matcher": {"id": "byName", "options": "cash"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "#FADE2A"}}, - {"id": "displayName", "value": "Cash"} - ] - }, - { - "matcher": {"id": "byName", "options": "invested"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "#56A64B"}}, - {"id": "displayName", "value": "Invested"} - ] - } - ] - }, - "options": { - "legend": {"calcs": ["last"], "displayMode": "table", "placement": "bottom"}, - "tooltip": {"mode": "multi", "sort": "desc"} - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "time_series", - "rawSql": "SELECT d.valuation_date::timestamp AS \"time\", SUM(CASE WHEN a.account_type = 'WORKPLACE_PENSION' THEN 0 ELSE d.cash_balance END) AS cash, SUM(CASE WHEN a.account_type = 'WORKPLACE_PENSION' THEN d.cash_balance + d.investment_market_value ELSE d.investment_market_value END) AS invested FROM daily_account_valuation d JOIN accounts a ON a.id = d.account_id WHERE $__timeFilter(d.valuation_date) GROUP BY d.valuation_date ORDER BY d.valuation_date" - } - ] - }, - { - "id": 10, - "title": "Activity log", - "description": "Recent activities (BUY / SELL / DEPOSIT / WITHDRAWAL / DIVIDEND / etc.) across all accounts. Limited to 100 most recent.", - "type": "table", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 14, "w": 24, "x": 0, "y": 77}, - "fieldConfig": { - "defaults": { - "custom": {"align": "auto", "displayMode": "auto"} - }, - "overrides": [ - { - "matcher": {"id": "byName", "options": "amount"}, - "properties": [{"id": "unit", "value": "currencyGBP"}] - } - ] - }, - "options": { - "cellHeight": "sm", - "footer": {"show": false} - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT a.activity_date AS \"date\", acc.name AS \"account\", a.activity_type AS \"type\", a.asset_id AS \"asset\", a.quantity AS \"qty\", a.unit_price AS \"unit_price\", a.amount AS \"amount\", a.currency AS \"ccy\", a.notes AS \"notes\" FROM activities a LEFT JOIN accounts acc ON acc.id = a.account_id WHERE $__timeFilter(a.activity_date) ORDER BY a.activity_date DESC LIMIT 100" - } - ] - }, - { - "id": 11, - "title": "12mo return", - "description": "Modified-Dietz return over the trailing 12 months: market_gain / (nw_12mo_ago + 0.5 × contributions_12mo). Excludes new money in — answers 'how did my investments perform' rather than 'how much did my net worth change'.", - "type": "stat", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 4, "w": 4, "x": 20, "y": 0}, - "fieldConfig": { - "defaults": { - "unit": "percent", - "color": {"mode": "thresholds"}, - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - {"color": "red", "value": null}, - {"color": "yellow", "value": 0}, - {"color": "green", "value": 5} - ] - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, - "textMode": "auto" - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "WITH bounds AS (SELECT (SELECT MAX(valuation_date) FROM daily_account_valuation) AS d_now, (SELECT MIN(valuation_date) FROM daily_account_valuation WHERE valuation_date >= (SELECT MAX(valuation_date) - INTERVAL '12 months' FROM daily_account_valuation)) AS d_ago), agg AS (SELECT (SELECT SUM(total_value) FROM daily_account_valuation WHERE valuation_date = b.d_now) AS nw_now, (SELECT SUM(net_contribution) FROM daily_account_valuation WHERE valuation_date = b.d_now) AS contrib_now, (SELECT SUM(total_value) FROM daily_account_valuation WHERE valuation_date = b.d_ago) AS nw_ago, (SELECT SUM(net_contribution) FROM daily_account_valuation WHERE valuation_date = b.d_ago) AS contrib_ago FROM bounds b) SELECT ROUND((((nw_now - nw_ago - (contrib_now - contrib_ago)) / NULLIF(nw_ago + 0.5 * (contrib_now - contrib_ago), 0)) * 100)::numeric, 2) AS pct_12mo FROM agg" - } - ] - }, - { - "id": 12, - "title": "Yearly investment return %", - "description": "Modified-Dietz return per calendar year: market_gain / (nw_start + 0.5 × contributions). Pure investment performance — excludes new contributions, so a £100k vest doesn't show as 100% growth. Negative bars = market losses (e.g., 2022 bear market).", - "type": "barchart", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 11, "w": 24, "x": 0, "y": 45}, - "fieldConfig": { - "defaults": { - "color": {"mode": "thresholds"}, - "unit": "percent", - "decimals": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - {"color": "red", "value": null}, - {"color": "yellow", "value": 0}, - {"color": "green", "value": 5} - ] - }, - "custom": { - "axisPlacement": "auto", - "axisLabel": "", - "fillOpacity": 80, - "gradientMode": "none", - "lineWidth": 1 - } - }, - "overrides": [ - { - "matcher": {"id": "byName", "options": "year"}, - "properties": [ - {"id": "unit", "value": "string"} - ] - } - ] - }, - "options": { - "barRadius": 0, - "barWidth": 0.6, - "groupWidth": 0.7, - "orientation": "auto", - "showValue": "always", - "stacking": "none", - "xField": "year", - "xTickLabelRotation": 0, - "legend": {"displayMode": "list", "placement": "bottom"}, - "tooltip": {"mode": "single", "sort": "none"} - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "WITH yearly AS (SELECT EXTRACT(YEAR FROM valuation_date)::int AS yr, valuation_date, SUM(total_value) AS nw, SUM(net_contribution) AS contrib FROM daily_account_valuation GROUP BY valuation_date), endpoints AS (SELECT yr, (array_agg(nw ORDER BY valuation_date ASC))[1] AS nw_start, (array_agg(nw ORDER BY valuation_date DESC))[1] AS nw_end, (array_agg(contrib ORDER BY valuation_date ASC))[1] AS contrib_start, (array_agg(contrib ORDER BY valuation_date DESC))[1] AS contrib_end FROM yearly GROUP BY yr) SELECT yr::text AS year, ROUND((((nw_end - nw_start - (contrib_end - contrib_start)) / NULLIF(nw_start + 0.5 * (contrib_end - contrib_start), 0)) * 100)::numeric, 2) AS return_pct FROM endpoints WHERE (nw_start + 0.5 * (contrib_end - contrib_start)) > 0 ORDER BY yr" - } - ] - }, - { - "id": 13, - "title": "Annual change decomposition — contributions vs market gain", - "description": "Each calendar year's net worth change split into 'new money in' (contributions − withdrawals) and 'market gain' (everything else: price appreciation, dividends, etc.). Shows whether you grew because you saved or because the market did the work. Negative bars = withdrawals or market losses.", - "type": "barchart", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 11, "w": 24, "x": 0, "y": 56}, - "fieldConfig": { - "defaults": { - "color": {"mode": "palette-classic"}, - "unit": "currencyGBP", - "decimals": 0, - "custom": { - "axisPlacement": "auto", - "axisLabel": "", - "fillOpacity": 80, - "gradientMode": "none", - "lineWidth": 1 - } - }, - "overrides": [ - { - "matcher": {"id": "byName", "options": "year"}, - "properties": [ - {"id": "unit", "value": "string"} - ] - }, - { - "matcher": {"id": "byName", "options": "contributions"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "blue"}}, - {"id": "displayName", "value": "Net contributions"} - ] - }, - { - "matcher": {"id": "byName", "options": "market_gain"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "#56A64B"}}, - {"id": "displayName", "value": "Market gain"} - ] - } - ] - }, - "options": { - "barRadius": 0, - "barWidth": 0.6, - "groupWidth": 0.7, - "orientation": "auto", - "showValue": "auto", - "stacking": "normal", - "xField": "year", - "xTickLabelRotation": 0, - "legend": {"calcs": ["sum"], "displayMode": "table", "placement": "bottom"}, - "tooltip": {"mode": "multi", "sort": "desc"} - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "WITH yearly AS (SELECT EXTRACT(YEAR FROM valuation_date)::int AS yr, valuation_date, SUM(total_value) AS nw, SUM(net_contribution) AS contrib FROM daily_account_valuation GROUP BY valuation_date), endpoints AS (SELECT yr, (array_agg(nw ORDER BY valuation_date ASC))[1] AS nw_start, (array_agg(nw ORDER BY valuation_date DESC))[1] AS nw_end, (array_agg(contrib ORDER BY valuation_date ASC))[1] AS contrib_start, (array_agg(contrib ORDER BY valuation_date DESC))[1] AS contrib_end FROM yearly GROUP BY yr) SELECT yr::text AS year, ROUND((contrib_end - contrib_start)::numeric, 0) AS contributions, ROUND((nw_end - nw_start - (contrib_end - contrib_start))::numeric, 0) AS market_gain FROM endpoints ORDER BY yr" - } - ] - }, - { - "id": 14, - "title": "Per-account ROI %", - "description": "(market value − net contribution) / net contribution × 100, latest snapshot. Excludes accounts with zero/negative net contribution (Schwab — RSU vests sold = negative contribution distorts the ratio). Pension shows 0% because Wealthfolio doesn't track underlying fund holdings, so cost_basis = 0 and 'growth' is just the cash balance reported.", - "type": "barchart", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "gridPos": {"h": 10, "w": 24, "x": 0, "y": 67}, - "fieldConfig": { - "defaults": { - "color": {"mode": "thresholds"}, - "unit": "percent", - "decimals": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - {"color": "red", "value": null}, - {"color": "yellow", "value": 0}, - {"color": "green", "value": 10} - ] - }, - "custom": { - "axisPlacement": "auto", - "axisLabel": "", - "fillOpacity": 80, - "gradientMode": "none", - "lineWidth": 1 - } - }, - "overrides": [] - }, - "options": { - "barRadius": 0, - "barWidth": 0.6, - "groupWidth": 0.7, - "orientation": "horizontal", - "showValue": "always", - "stacking": "none", - "xField": "account", - "legend": {"displayMode": "list", "placement": "bottom"}, - "tooltip": {"mode": "single", "sort": "none"} - }, - "targets": [ - { - "refId": "A", - "datasource": {"type": "grafana-postgresql-datasource", "uid": "wealth-pg"}, - "rawQuery": true, - "editorMode": "code", - "format": "table", - "rawSql": "SELECT a.name AS account, ROUND(((d.total_value - d.net_contribution) / NULLIF(d.net_contribution, 0) * 100)::numeric, 2) AS roi_pct FROM daily_account_valuation d JOIN accounts a ON a.id = d.account_id WHERE d.valuation_date = (SELECT MAX(valuation_date) FROM daily_account_valuation) AND d.net_contribution > 0 ORDER BY roi_pct DESC" - } - ] - } - ], - "refresh": "5m", - "schemaVersion": 39, - "tags": ["finance", "personal", "wealth"], - "templating": {"list": []}, - "time": {"from": "now-5y", "to": "now"}, - "timepicker": {}, - "timezone": "browser", - "title": "Wealth", - "uid": "wealth", - "version": 1 -} diff --git a/stacks/monitoring/modules/monitoring/grafana.tf b/stacks/monitoring/modules/monitoring/grafana.tf index b5a5f249..2c5089ee 100644 --- a/stacks/monitoring/modules/monitoring/grafana.tf +++ b/stacks/monitoring/modules/monitoring/grafana.tf @@ -134,19 +134,9 @@ locals { # Applications "qbittorrent.json" = "Applications" "realestate-crawler.json" = "Applications" - "uk-payslip.json" = "Finance (Personal)" - "wealth.json" = "Finance (Personal)" + "uk-payslip.json" = "Finance" "job-hunter.json" = "Finance" - "fire-planner.json" = "Finance" } - - # Folders restricted to the Grafana admin user (anonymous Viewer + any future - # non-admin users are denied). Permission set by null_resource below via the - # Grafana folder permissions API after the dashboard sidecar auto-creates the - # folder. Server-admin always retains access regardless of folder ACL. - admin_only_folders = [ - "Finance (Personal)", - ] } resource "kubernetes_config_map" "grafana_dashboards" { @@ -167,60 +157,6 @@ resource "kubernetes_config_map" "grafana_dashboards" { } } -# Lock down "admin only" folders via Grafana folder permissions API. -# Default org-role inheritance gives Viewer + Editor read access to every -# folder; explicitly setting the folder ACL to {Admin: 4} overrides that -# inheritance so Viewer/Editor (incl. anonymous-Viewer) get no access. -# The Grafana super-admin (`admin` user) always retains access regardless. -resource "null_resource" "grafana_admin_only_folder_acl" { - for_each = toset(local.admin_only_folders) - - # Re-runs on tg apply (cheap, idempotent API call). Catches drift if anyone - # edits permissions via the UI or the folder is rebuilt. - triggers = { - folder = each.value - always = timestamp() - } - - provisioner "local-exec" { - interpreter = ["/bin/bash", "-c"] - command = <<-EOT - set -euo pipefail - FOLDER='${each.value}' - KUBECONFIG_FLAG='--kubeconfig ${var.kube_config_path}' - POD=$(kubectl $KUBECONFIG_FLAG get pod -n monitoring -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].metadata.name}') - ADMIN_PW=$(kubectl $KUBECONFIG_FLAG get secret -n monitoring grafana -o jsonpath='{.data.admin-password}' | base64 -d) - - # Wait up to 60s for the dashboard sidecar to materialise the folder. - for i in $(seq 1 12); do - FOLDER_UID=$(kubectl $KUBECONFIG_FLAG exec -n monitoring "$POD" -c grafana -- \ - curl -sf -u "admin:$ADMIN_PW" "http://localhost:3000/api/folders" \ - | python3 -c "import json,sys; folders=json.load(sys.stdin); print(next((f['uid'] for f in folders if f['title']==sys.argv[1]), ''))" "$FOLDER" || true) - if [ -n "$FOLDER_UID" ]; then break; fi - sleep 5 - done - - if [ -z "$FOLDER_UID" ]; then - echo "ERROR: folder '$FOLDER' not found in Grafana after 60s" - exit 1 - fi - - # Admin-only ACL. permission codes: 1=View, 2=Edit, 4=Admin. - kubectl $KUBECONFIG_FLAG exec -n monitoring "$POD" -c grafana -- \ - curl -sf -u "admin:$ADMIN_PW" -X POST \ - -H "Content-Type: application/json" \ - -d '{"items":[{"role":"Admin","permission":4}]}' \ - "http://localhost:3000/api/folders/$FOLDER_UID/permissions" >/dev/null - echo "set admin-only ACL on folder '$FOLDER' (uid=$FOLDER_UID)" - EOT - } - - depends_on = [ - helm_release.grafana, - kubernetes_config_map.grafana_dashboards, - ] -} - resource "helm_release" "grafana" { namespace = kubernetes_namespace.monitoring.metadata[0].name create_namespace = true diff --git a/stacks/monitoring/modules/monitoring/main.tf b/stacks/monitoring/modules/monitoring/main.tf index d55ac703..db0c798e 100644 --- a/stacks/monitoring/modules/monitoring/main.tf +++ b/stacks/monitoring/modules/monitoring/main.tf @@ -27,10 +27,6 @@ variable "grafana_admin_password" { type = string sensitive = true } -variable "kube_config_path" { - type = string - sensitive = true -} variable "tier" { type = string } variable "mysql_host" { type = string } variable "registry_user" { diff --git a/stacks/monitoring/modules/monitoring/prometheus.tf b/stacks/monitoring/modules/monitoring/prometheus.tf index c317775e..7e998b91 100644 --- a/stacks/monitoring/modules/monitoring/prometheus.tf +++ b/stacks/monitoring/modules/monitoring/prometheus.tf @@ -40,11 +40,8 @@ resource "helm_release" "prometheus" { # version = "15.0.2" version = "25.8.2" - timeout = 900 # 15 min — Recreate strategy + iSCSI reattach is slow - # force_update disabled 2026-04-23: caused Helm to try replacing the bound - # pushgateway PVC (added in rev 188, see commit e51c104), which is immutable. - # Re-enable temporarily only when a StatefulSet volumeClaimTemplate change needs --force. - force_update = false + timeout = 900 # 15 min — Recreate strategy + iSCSI reattach is slow + force_update = true # Required for StatefulSet volumeClaimTemplate changes (immutable field) values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password, alertmanager_slack_api_url = var.alertmanager_slack_api_url, tuya_api_key = var.tiny_tuya_service_secret, haos_api_token = var.haos_api_token })] } diff --git a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl index 0051982e..b0233985 100755 --- a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl @@ -73,7 +73,7 @@ alertmanager: - source_matchers: - alertname = NodeDown target_matchers: - - alertname =~ "NodeNotReady|NodeConditionBad|PodCrashLooping|ContainerOOMKilled|DeploymentReplicasMismatch|StatefulSetReplicasMismatch|DaemonSetMissingPods|ScrapeTargetDown|NodeLowFreeMemory|PostgreSQLDown|RedisDown|HeadscaleDown|HeadscaleReplicasMismatch|AuthentikDown|PoisonFountainDown|HackmdDown|PrivatebinDown|MailServerDown|EmailRoundtripFailing|EmailRoundtripStale|NodeExporterDown|DockerRegistryDown|HomeAssistantDown|CloudflaredDown|TechnitiumDNSDown|iDRACRedfishMetricsMissing|iDRACSNMPMetricsMissing|HomeAssistantMetricsMissing" + - alertname =~ "NodeNotReady|NodeConditionBad|PodCrashLooping|ContainerOOMKilled|DeploymentReplicasMismatch|StatefulSetReplicasMismatch|DaemonSetMissingPods|ScrapeTargetDown|NodeLowFreeMemory|PostgreSQLDown|RedisDown|HeadscaleDown|AuthentikDown|PoisonFountainDown|HackmdDown|PrivatebinDown|MailServerDown|EmailRoundtripFailing|EmailRoundtripStale|NodeExporterDown|DockerRegistryDown|HomeAssistantDown|CloudflaredDown|TechnitiumDNSDown|iDRACRedfishMetricsMissing|iDRACSNMPMetricsMissing|HomeAssistantMetricsMissing" # NFS down causes mass pod failures and NFS-dependent service outages - source_matchers: - alertname = NFSServerUnresponsive @@ -98,7 +98,7 @@ alertmanager: - source_matchers: - alertname = PowerOutage target_matchers: - - alertname =~ "NodeDown|NFSServerUnresponsive|NodeExporterDown|CloudflaredDown|MetalLBSpeakerDown|MetalLBControllerDown|UPSMetricsMissing|iDRACRedfishMetricsMissing|iDRACSNMPMetricsMissing|ATSMetricsMissing|HomeAssistantMetricsMissing|FuseMainMetricsMissing|FuseGarageMetricsMissing|ThermostatHolMetricsMissing|ThermostatMasterBedroomMetricsMissing|ThermostatOfficeMetricsMissing|ThermostatKidsRoomMetricsMissing|ProxmoxMetricsMissing|iDRACSystemUnhealthy|iDRACServerPoweredOff|ProxmoxExporterDown" + - alertname =~ "NodeDown|NFSServerUnresponsive|NodeExporterDown|CloudflaredDown|MetalLBSpeakerDown|MetalLBControllerDown|UPSMetricsMissing|iDRACRedfishMetricsMissing|iDRACSNMPMetricsMissing|ATSMetricsMissing|HomeAssistantMetricsMissing|FuseMainMetricsMissing|FuseGarageMetricsMissing|ProxmoxMetricsMissing|iDRACSystemUnhealthy|iDRACServerPoweredOff|ProxmoxExporterDown" # iDRAC system-level unhealthy suppresses component-level alerts - source_matchers: - alertname = iDRACSystemUnhealthy @@ -113,11 +113,6 @@ alertmanager: - alertname = FuseGarageFault target_matchers: - alertname = FuseGarageMetricsMissing - # Tuya Cloud API down suppresses all per-device metrics-missing alerts - - source_matchers: - - alertname = TuyaCloudDown - target_matchers: - - alertname =~ "ATSMetricsMissing|FuseMainMetricsMissing|FuseGarageMetricsMissing|ThermostatHolMetricsMissing|ThermostatMasterBedroomMetricsMissing|ThermostatOfficeMetricsMissing|ThermostatKidsRoomMetricsMissing" # Containerd broken suppresses downstream pod alerts - source_matchers: - alertname = KubeletImagePullErrors @@ -165,27 +160,6 @@ prometheus-node-exporter: memory: 100Mi limits: memory: 100Mi -# NOTE: The parent chart forwards subchart values under `prometheus-pushgateway:`, -# not `pushgateway:` — using the wrong key silently no-ops. -prometheus-pushgateway: - # Without persistence the pushgateway's in-memory metrics are lost on restart. - # Once-per-day pushers (offsite-backup-sync) stay invisible until their next run, - # which is why backup_last_success_timestamp{job="offsite-backup-sync"} vanished - # after the 2026-04-22 node3 kubelet hiccup. - persistentVolume: - enabled: true - size: 2Gi - storageClass: proxmox-lvm-encrypted - mountPath: /data - extraArgs: - - --persistence.file=/data/pushgateway.bin - - --persistence.interval=1m - resources: - requests: - cpu: 10m - memory: 64Mi - limits: - memory: 256Mi server: # Enable me to delete metrics extraFlags: @@ -752,7 +726,6 @@ serverFiles: for: 30m labels: severity: info - subsystem: gpu annotations: summary: "GPU power: {{ $value | printf \"%.0f\" }}W (threshold: 50W)" - alert: HighUtilization @@ -776,14 +749,6 @@ serverFiles: severity: critical annotations: summary: "NVIDIA GPU exporter is down - no GPU metrics available" - - alert: GPUNodeUnschedulable - expr: kube_node_spec_unschedulable{node="k8s-node1"} == 1 - for: 5m - labels: - severity: critical - subsystem: gpu - annotations: - summary: "GPU node {{ $labels.node }} is cordoned — Frigate and GPU workloads cannot schedule" - name: Power rules: - alert: OnBattery @@ -812,7 +777,6 @@ serverFiles: for: 60m labels: severity: info - subsystem: r730 annotations: summary: "Server power: {{ $value | printf \"%.0f\" }}W (threshold: 300W)" - alert: UsingInverterEnergyForTooLong @@ -950,83 +914,6 @@ serverFiles: severity: critical annotations: summary: "Garage fuse panel fault detected" - - alert: FuseMainHighLeakage - expr: fuse_main_leakage_current > 30 - for: 5m - labels: - severity: critical - annotations: - summary: "Main fuse leakage current: {{ $value }}mA (threshold: 30mA)" - - alert: FuseGarageHighLeakage - expr: fuse_garage_leakage_current > 30 - for: 5m - labels: - severity: critical - annotations: - summary: "Garage fuse leakage current: {{ $value }}mA (threshold: 30mA)" - - alert: FuseMainOvertemperature - expr: fuse_main_temperature > 70 - for: 5m - labels: - severity: warning - annotations: - summary: "Main fuse temperature: {{ $value }}°C (threshold: 70°C)" - - alert: FuseGarageOvertemperature - expr: fuse_garage_temperature > 70 - for: 5m - labels: - severity: warning - annotations: - summary: "Garage fuse temperature: {{ $value }}°C (threshold: 70°C)" - - alert: FuseMainVoltageAbnormal - expr: fuse_main_voltage / 10 < 200 or fuse_main_voltage / 10 > 260 - for: 5m - labels: - severity: critical - annotations: - summary: "Main fuse voltage: {{ $value }}V (expected 200-260V)" - - alert: FuseGarageVoltageAbnormal - expr: fuse_garage_voltage / 10 < 200 or fuse_garage_voltage / 10 > 260 - for: 5m - labels: - severity: critical - annotations: - summary: "Garage fuse voltage: {{ $value }}V (expected 200-260V)" - - name: Thermostats - rules: - - alert: ThermostatOverheating - expr: > - thermostat_hol_temp_current > 400 - or thermostat_master_bedroom_temp_current > 400 - or thermostat_office_temp_current > 400 - or thermostat_kids_room_temp_current > 400 - for: 10m - labels: - severity: warning - annotations: - summary: "Thermostat temperature {{ $value | printf \"%.1f\" }} (x10 °C) exceeds 40°C" - - alert: ThermostatFreezing - expr: > - thermostat_hol_temp_current < 50 - or thermostat_master_bedroom_temp_current < 50 - or thermostat_office_temp_current < 50 - or thermostat_kids_room_temp_current < 50 - for: 15m - labels: - severity: critical - annotations: - summary: "Thermostat temperature {{ $value | printf \"%.1f\" }} (x10 °C) below 5°C — risk of freezing" - - alert: ThermostatHumidityHigh - expr: > - thermostat_hol_humidity > 80 - or thermostat_master_bedroom_humidity > 80 - or thermostat_office_humidity > 80 - or thermostat_kids_room_humidity > 80 - for: 30m - labels: - severity: warning - annotations: - summary: "Thermostat humidity {{ $value }}% exceeds 80%" - name: Metric Staleness rules: - alert: UPSMetricsMissing @@ -1071,41 +958,6 @@ serverFiles: severity: warning annotations: summary: "Fuse garage panel metrics missing for 15m - check tuya-bridge pod" - - alert: ThermostatHolMetricsMissing - expr: absent(thermostat_hol_temp_current) - for: 15m - labels: - severity: warning - annotations: - summary: "Thermostat hol metrics missing for 15m - check tuya-bridge pod" - - alert: ThermostatMasterBedroomMetricsMissing - expr: absent(thermostat_master_bedroom_temp_current) - for: 15m - labels: - severity: warning - annotations: - summary: "Thermostat master bedroom metrics missing for 15m - check tuya-bridge pod" - - alert: ThermostatOfficeMetricsMissing - expr: absent(thermostat_office_temp_current) - for: 15m - labels: - severity: warning - annotations: - summary: "Thermostat office metrics missing for 15m - check tuya-bridge pod" - - alert: ThermostatKidsRoomMetricsMissing - expr: absent(thermostat_kids_room_temp_current) - for: 15m - labels: - severity: warning - annotations: - summary: "Thermostat kids room metrics missing for 15m - check tuya-bridge pod" - - alert: TuyaCloudDown - expr: count(({__name__=~".*_tuya_cloud_up"}) == 0) > 0 - for: 5m - labels: - severity: warning - annotations: - summary: "Tuya Cloud API rejecting calls ({{ $value }} devices affected) — renew subscription at iot.tuya.com (code 28841002 = expired trial) or rotate TINYTUYA_API_KEY" - alert: ProxmoxMetricsMissing expr: absent(pve_up) for: 10m @@ -1219,14 +1071,6 @@ serverFiles: severity: warning annotations: summary: "Home Assistant down: {{ $labels.instance }}" - - alert: HomeAssistantCriticalSensorUnavailable - expr: haos_entity_available{entity=~"sensor\\.(tesla_t4_gpu_(temperature|power_usage|utilization|memory_used)|r730_(cpu_temperature|power_consumption|power_supply_input_voltage_[12]|system_board_(exhaust|inlet)_temperature)|ups_(input_voltage|output_voltage|load|battery_remaining|output_source))"} == 0 - for: 15m - labels: - severity: critical - annotations: - summary: "HA sensor unavailable: {{ $labels.friendly_name }} ({{ $labels.entity }})" - description: "{{ $labels.entity }} on {{ $labels.instance }} has been unavailable for 15+ minutes. Common cause: REST sensor needs HA restart (reload_all doesn't rebuild rest: platform). Verify exporter endpoint from HA: `ssh vbarzin@192.168.1.8` → `curl -sk `. Fix: `curl -X POST -H \"Authorization: Bearer $HOME_ASSISTANT_SOFIA_TOKEN\" $HOME_ASSISTANT_SOFIA_URL/api/services/homeassistant/restart`." - alert: CoreDNSErrors expr: rate(coredns_dns_responses_total{rcode="SERVFAIL"}[5m]) > 1 and on() (time() - process_start_time_seconds{job="prometheus"}) > 900 for: 10m @@ -1311,28 +1155,6 @@ serverFiles: severity: critical annotations: summary: "Vault backup CronJob has never completed successfully" - - alert: VaultRaftLeaderStuck - expr: | - (vault_core_active == 1) - and on(instance) - (rate(vault_raft_last_index_gauge[5m]) == 0) - for: 2m - labels: - severity: critical - annotations: - summary: "Vault raft leader {{ $labels.instance }} is active but commit index has not advanced for >2m" - description: "The raft leader is reachable on TCP but its commit index has stalled — likely a stuck goroutine hang (see 2026-04-22 post-mortem). External /v1/sys/health will be 503. Recovery: graceful delete of the stuck pod (see docs/runbooks/vault-raft-leader-deadlock.md). NOTE: silent until vault telemetry + scrape job are enabled." - - alert: VaultHAStatusUnavailable - expr: | - (count(up{job="vault"} == 1) > 0) - and - (count(vault_core_active == 1) == 0) - for: 5m - labels: - severity: critical - annotations: - summary: "Vault pods are Up but no pod reports HA active leader" - description: "At least one Vault pod is scraping healthy, but no pod has vault_core_active=1. HA layer is broken — external endpoint will be 503 even though the pods themselves are alive. See docs/runbooks/vault-raft-leader-deadlock.md. NOTE: silent until vault telemetry + scrape job are enabled." - alert: VaultwardenBackupStale expr: (time() - kube_cronjob_status_last_successful_time{cronjob="vaultwarden-backup", namespace="vaultwarden"}) > 86400 for: 30m @@ -1425,13 +1247,12 @@ serverFiles: annotations: summary: "Backup job failed: {{ $labels.namespace }}/{{ $labels.job_name }}" - alert: LVMSnapshotStale - expr: (time() - lvm_snapshot_last_run_timestamp{job="lvm-pvc-snapshot"}) > 108000 + expr: (time() - lvm_snapshot_last_run_timestamp{job="lvm-pvc-snapshot"}) > 172800 for: 30m labels: severity: critical annotations: summary: "LVM PVC snapshots are {{ $value | humanizeDuration }} old (expected daily)" - description: "Timer lvm-pvc-snapshot.timer on 192.168.1.127 hasn't pushed fresh metrics. Runbook: docs/runbooks/restore-lvm-snapshot.md" - alert: LVMSnapshotNeverRun expr: absent(lvm_snapshot_last_run_timestamp{job="lvm-pvc-snapshot"}) for: 48h @@ -1590,7 +1411,7 @@ serverFiles: severity: warning annotations: summary: "Redis master {{ $labels.pod }} has only {{ $value }} connected replicas (expected 2)" - - alert: HeadscaleReplicasMismatch + - alert: HeadscaleDown expr: (kube_deployment_status_replicas_available{namespace="headscale"} or on() vector(0)) < 1 for: 5m labels: @@ -1994,7 +1815,7 @@ serverFiles: summary: "Email round-trip probe failing. Check MX DNS, Postfix, Mailgun API, and IMAP." - alert: EmailRoundtripStale expr: (time() - email_roundtrip_last_success_timestamp{job="email-roundtrip-monitor"}) > 3600 - for: 20m + for: 10m labels: severity: warning annotations: @@ -2159,7 +1980,7 @@ serverFiles: annotations: summary: "Technitium zone-sync has not run successfully in >1h (last: {{ $value | humanizeDuration }} ago)" - alert: TechnitiumZoneCountMismatch - expr: (max(technitium_zone_count{instance!="primary"}) - min(technitium_zone_count{instance!="primary"})) > 0 + expr: (max(technitium_zone_count) - min(technitium_zone_count)) > 0 for: 15m labels: severity: warning @@ -2174,6 +1995,13 @@ serverFiles: summary: "CoreDNS forward SERVFAIL/REFUSED rate: {{ $value | printf \"%.2f\" }}/s — upstream DNS (pfSense/public) may be unhealthy" - name: qbittorrent rules: + - alert: MAMMouseClass + expr: mam_class_code == 0 + for: 1h + labels: + severity: critical + annotations: + summary: "MAM account is in Mouse class — tracker is refusing announces, ratio cannot recover" - alert: MAMCookieExpired expr: mam_farming_cookie_expired > 0 for: 0m @@ -2212,6 +2040,13 @@ serverFiles: severity: critical annotations: summary: "qBittorrent is disconnected from the network" + - alert: QBittorrentMAMUnsatisfied + expr: qbt_tracker_unsatisfied{tracker="mam"} > 15 + for: 10m + labels: + severity: warning + annotations: + summary: "{{ $value | printf \"%.0f\" }} MAM torrents not yet seeded 72h (limit: 20 for new members)" - name: Headscale VPN rules: @@ -2526,58 +2361,6 @@ extraScrapeConfigs: | action: replace regex: '(.*)' replacement: 'fuse_main_$${1}' - - job_name: 'thermostat-hol' - static_configs: - - targets: - - "tuya-bridge.tuya-bridge.svc.cluster.local:80" - metrics_path: '/metrics/bf7efce9519bd508df431s' - params: - api-key: ['${tuya_api_key}'] - metric_relabel_configs: - - source_labels: [ __name__ ] - target_label: '__name__' - action: replace - regex: '(.*)' - replacement: 'thermostat_hol_$${1}' - - job_name: 'thermostat-master-bedroom' - static_configs: - - targets: - - "tuya-bridge.tuya-bridge.svc.cluster.local:80" - metrics_path: '/metrics/bf70e80159641f61a5lzho' - params: - api-key: ['${tuya_api_key}'] - metric_relabel_configs: - - source_labels: [ __name__ ] - target_label: '__name__' - action: replace - regex: '(.*)' - replacement: 'thermostat_master_bedroom_$${1}' - - job_name: 'thermostat-office' - static_configs: - - targets: - - "tuya-bridge.tuya-bridge.svc.cluster.local:80" - metrics_path: '/metrics/bf9597a0064f0349d4b09x' - params: - api-key: ['${tuya_api_key}'] - metric_relabel_configs: - - source_labels: [ __name__ ] - target_label: '__name__' - action: replace - regex: '(.*)' - replacement: 'thermostat_office_$${1}' - - job_name: 'thermostat-kids-room' - static_configs: - - targets: - - "tuya-bridge.tuya-bridge.svc.cluster.local:80" - metrics_path: '/metrics/bfe64da91577117e0annt5' - params: - api-key: ['${tuya_api_key}'] - metric_relabel_configs: - - source_labels: [ __name__ ] - target_label: '__name__' - action: replace - regex: '(.*)' - replacement: 'thermostat_kids_room_$${1}' - job_name: 'haos' static_configs: - targets: diff --git a/stacks/nextcloud/main.tf b/stacks/nextcloud/main.tf index d737fa5c..14a5122d 100644 --- a/stacks/nextcloud/main.tf +++ b/stacks/nextcloud/main.tf @@ -493,25 +493,6 @@ resource "kubernetes_cron_job_v1" "nextcloud-backup" { spec { restart_policy = "OnFailure" - # Backup mounts the same RWO PVC (proxmox-lvm-encrypted) as the - # main nextcloud pod, so it MUST schedule on the same node — the - # volume cannot attach to two nodes simultaneously. Without this - # the backup pod is stuck in ContainerCreating until cron retries. - affinity { - pod_affinity { - required_during_scheduling_ignored_during_execution { - label_selector { - match_labels = { - "app.kubernetes.io/name" = "nextcloud" - "app.kubernetes.io/instance" = "nextcloud" - } - } - topology_key = "kubernetes.io/hostname" - namespaces = [kubernetes_namespace.nextcloud.metadata[0].name] - } - } - } - container { name = "backup" image = "alpine:latest" diff --git a/stacks/nvidia/modules/nvidia/main.tf b/stacks/nvidia/modules/nvidia/main.tf index 720f6daf..f11bd2c3 100644 --- a/stacks/nvidia/modules/nvidia/main.tf +++ b/stacks/nvidia/modules/nvidia/main.tf @@ -63,25 +63,18 @@ resource "kubernetes_resource_quota" "nvidia_quota" { } } -# Apply GPU taint dynamically based on NFD-discovered GPU nodes. The -# NFD label `feature.node.kubernetes.io/pci-10de.present=true` is -# auto-applied on any node with an NVIDIA PCI device (vendor 0x10de), -# so the taint follows the card if it moves between nodes. Workload -# nodeSelectors key off `nvidia.com/gpu.present=true` (applied by -# gpu-feature-discovery once the operator is up). +# Apply GPU taint and label to ensure only GPU workloads run on GPU node resource "null_resource" "gpu_node_config" { provisioner "local-exec" { command = <<-EOT - set -euo pipefail - for node in $(kubectl get nodes -l feature.node.kubernetes.io/pci-10de.present=true -o jsonpath='{.items[*].metadata.name}'); do - kubectl taint nodes "$node" nvidia.com/gpu=true:PreferNoSchedule --overwrite - done + kubectl taint nodes k8s-node1 nvidia.com/gpu=true:PreferNoSchedule --overwrite + kubectl label nodes k8s-node1 gpu=true --overwrite EOT } + # Re-run if namespace changes (proxy for cluster changes) triggers = { - namespace = kubernetes_namespace.nvidia.metadata[0].name - command_hash = "dynamic-taint-v1" + namespace = kubernetes_namespace.nvidia.metadata[0].name } } @@ -148,7 +141,7 @@ resource "kubernetes_deployment" "nvidia-exporter" { } spec { node_selector = { - "nvidia.com/gpu.present" : "true" + "gpu" : "true" } toleration { key = "nvidia.com/gpu" @@ -611,7 +604,7 @@ resource "kubernetes_daemonset" "gpu_pod_exporter" { service_account_name = kubernetes_service_account.gpu_pod_exporter.metadata[0].name node_selector = { - "nvidia.com/gpu.present" : "true" + "gpu" : "true" } toleration { diff --git a/stacks/paperless-ngx/main.tf b/stacks/paperless-ngx/main.tf index 4bafe6ce..bceafaf2 100644 --- a/stacks/paperless-ngx/main.tf +++ b/stacks/paperless-ngx/main.tf @@ -86,28 +86,6 @@ resource "kubernetes_persistent_volume_claim" "data_proxmox" { } } -resource "kubernetes_persistent_volume_claim" "data_encrypted" { - wait_until_bound = false - metadata { - name = "paperless-ngx-data-encrypted" - namespace = kubernetes_namespace.paperless-ngx.metadata[0].name - annotations = { - "resize.topolvm.io/threshold" = "80%" - "resize.topolvm.io/increase" = "100%" - "resize.topolvm.io/storage_limit" = "5Gi" - } - } - spec { - access_modes = ["ReadWriteOnce"] - storage_class_name = "proxmox-lvm-encrypted" - resources { - requests = { - storage = "1Gi" - } - } - } -} - resource "kubernetes_deployment" "paperless-ngx" { metadata { @@ -218,7 +196,7 @@ resource "kubernetes_deployment" "paperless-ngx" { volume { name = "data" persistent_volume_claim { - claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name + claim_name = kubernetes_persistent_volume_claim.data_proxmox.metadata[0].name } } } diff --git a/stacks/payslip-ingest/main.tf b/stacks/payslip-ingest/main.tf index 8c313c25..7e4d0006 100644 --- a/stacks/payslip-ingest/main.tf +++ b/stacks/payslip-ingest/main.tf @@ -32,20 +32,7 @@ resource "kubernetes_namespace" "payslip_ingest" { # Seed these manually in Vault before applying: # secret/paperless-ngx -> property `api_token` # secret/claude-agent-service -> property `api_bearer_token` -# secret/payslip-ingest -> properties: -# - `webhook_bearer_token` -# - `actualbudget_api_key` (same value as -# actualbudget-http-api-viktor random -# api-key — fetch via `kubectl get pods -# -n actualbudget -l -# app=actualbudget-http-api-viktor -o -# jsonpath={.items[0].spec.containers[0].env}` -# and grep API_KEY) -# - `actualbudget_encryption_password` -# (same as Viktor's budget password in -# secret/actualbudget/credentials[viktor]) -# - `actualbudget_budget_sync_id` -# (same as Viktor's sync_id) +# secret/payslip-ingest -> property `webhook_bearer_token` resource "kubernetes_manifest" "external_secret" { manifest = { apiVersion = "external-secrets.io/v1beta1" @@ -92,27 +79,6 @@ resource "kubernetes_manifest" "external_secret" { property = "webhook_bearer_token" } }, - { - secretKey = "ACTUALBUDGET_API_KEY" - remoteRef = { - key = "payslip-ingest" - property = "actualbudget_api_key" - } - }, - { - secretKey = "ACTUALBUDGET_ENCRYPTION_PASSWORD" - remoteRef = { - key = "payslip-ingest" - property = "actualbudget_encryption_password" - } - }, - { - secretKey = "ACTUALBUDGET_BUDGET_SYNC_ID" - remoteRef = { - key = "payslip-ingest" - property = "actualbudget_budget_sync_id" - } - }, ] } } @@ -322,85 +288,6 @@ resource "kubernetes_service" "payslip_ingest" { } } -# Daily sync of Meta payroll deposits from ActualBudget's http-api sidecar. -# Populates payslip_ingest.external_meta_deposits so Panel 14 can overlay bank -# deposits against payslip.net_pay — catches parser drift on net_pay. -resource "kubernetes_cron_job_v1" "actualbudget_payroll_sync" { - metadata { - name = "actualbudget-payroll-sync" - namespace = kubernetes_namespace.payslip_ingest.metadata[0].name - } - spec { - schedule = "0 2 * * *" - concurrency_policy = "Forbid" - successful_jobs_history_limit = 3 - failed_jobs_history_limit = 5 - starting_deadline_seconds = 300 - - job_template { - metadata { - labels = local.labels - } - spec { - backoff_limit = 1 - ttl_seconds_after_finished = 86400 - template { - metadata { - labels = local.labels - } - spec { - restart_policy = "OnFailure" - image_pull_secrets { - name = "registry-credentials" - } - container { - name = "sync" - image = local.image - command = ["python", "-m", "payslip_ingest", "sync-meta-deposits"] - - env_from { - secret_ref { - name = "payslip-ingest-secrets" - } - } - env_from { - secret_ref { - name = "payslip-ingest-db-creds" - } - } - - env { - name = "ACTUALBUDGET_HTTP_API_URL" - value = "http://budget-http-api-viktor.actualbudget.svc.cluster.local" - } - - resources { - requests = { - cpu = "50m" - memory = "128Mi" - } - limits = { - memory = "256Mi" - } - } - } - } - } - } - } - } - - lifecycle { - # KYVERNO_LIFECYCLE_V1 - ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] - } - - depends_on = [ - kubernetes_manifest.external_secret, - kubernetes_manifest.db_external_secret, - ] -} - # Plan-time read of the ESO-created K8s Secret for Grafana datasource password. # First apply: -target=kubernetes_manifest.db_external_secret first so the Secret exists. data "kubernetes_secret" "payslip_ingest_db_creds" { diff --git a/stacks/poison-fountain/main.tf b/stacks/poison-fountain/main.tf index 870a1675..64e7ae21 100644 --- a/stacks/poison-fountain/main.tf +++ b/stacks/poison-fountain/main.tf @@ -219,10 +219,6 @@ module "ingress" { skip_default_rate_limit = true exclude_crowdsec = true anti_ai_scraping = false - # Deployment is scaled to 0 (see replicas above). Opt the ingress out of - # Uptime Kuma external monitoring so the sync CronJob deletes the orphaned - # `[External] poison` monitor instead of flapping DOWN. - external_monitor = false extra_annotations = { "gethomepage.dev/enabled" = "true" "gethomepage.dev/name" = "Poison Fountain" @@ -256,13 +252,6 @@ resource "kubernetes_cron_job_v1" "poison_fetcher" { name = "poison-fountain-fetcher" } spec { - security_context { - # curlimages/curl defaults to uid 100, but the NFS mount at /data is - # owned root:root 755 (writes from the main Deployment which runs as - # root). Align the CronJob with the Deployment so mkdir /data/cache - # succeeds. no_root_squash is set on the /srv/nfs export. - run_as_user = 0 - } container { name = "fetcher" image = "curlimages/curl:latest" diff --git a/stacks/poison-fountain/providers.tf b/stacks/poison-fountain/providers.tf index 012af700..b337a2e9 100644 --- a/stacks/poison-fountain/providers.tf +++ b/stacks/poison-fountain/providers.tf @@ -9,10 +9,6 @@ terraform { source = "cloudflare/cloudflare" version = "~> 4" } - authentik = { - source = "goauthentik/authentik" - version = "~> 2024.10" - } } } diff --git a/stacks/redis/modules/redis/main.tf b/stacks/redis/modules/redis/main.tf index c7eb9245..91a938bc 100644 --- a/stacks/redis/modules/redis/main.tf +++ b/stacks/redis/modules/redis/main.tf @@ -43,7 +43,7 @@ resource "kubernetes_config_map" "haproxy" { timeout connect 5s timeout client 30s timeout server 30s - timeout check 5s + timeout check 3s # Dynamic DNS resolution via cluster CoreDNS. Without this, haproxy # resolves server hostnames once at startup and caches forever, so @@ -82,9 +82,9 @@ resource "kubernetes_config_map" "haproxy" { tcp-check expect rstring role:master tcp-check send "QUIT\r\n" tcp-check expect string +OK - server redis-v2-0 redis-v2-0.redis-v2-headless.redis.svc.cluster.local:6379 check inter 2s fall 3 rise 2 resolvers kubernetes init-addr last,libc,none - server redis-v2-1 redis-v2-1.redis-v2-headless.redis.svc.cluster.local:6379 check inter 2s fall 3 rise 2 resolvers kubernetes init-addr last,libc,none - server redis-v2-2 redis-v2-2.redis-v2-headless.redis.svc.cluster.local:6379 check inter 2s fall 3 rise 2 resolvers kubernetes init-addr last,libc,none + server redis-v2-0 redis-v2-0.redis-v2-headless.redis.svc.cluster.local:6379 check inter 1s fall 2 rise 2 resolvers kubernetes init-addr last,libc,none + server redis-v2-1 redis-v2-1.redis-v2-headless.redis.svc.cluster.local:6379 check inter 1s fall 2 rise 2 resolvers kubernetes init-addr last,libc,none + server redis-v2-2 redis-v2-2.redis-v2-headless.redis.svc.cluster.local:6379 check inter 1s fall 2 rise 2 resolvers kubernetes init-addr last,libc,none backend redis_sentinel balance roundrobin @@ -362,8 +362,8 @@ resource "kubernetes_config_map" "redis_v2_sentinel_bootstrap" { sentinel resolve-hostnames yes sentinel announce-hostnames yes sentinel monitor mymaster $MASTER_HOST 6379 2 - sentinel down-after-milliseconds mymaster 15000 - sentinel failover-timeout mymaster 60000 + sentinel down-after-milliseconds mymaster 5000 + sentinel failover-timeout mymaster 30000 sentinel parallel-syncs mymaster 1 EOF @@ -396,7 +396,7 @@ resource "kubernetes_service" "redis_v2_headless" { } spec { cluster_ip = "None" - publish_not_ready_addresses = false + publish_not_ready_addresses = true selector = { app = "redis-v2" } @@ -451,15 +451,18 @@ resource "kubernetes_stateful_set_v1" "redis_v2" { affinity { pod_anti_affinity { - required_during_scheduling_ignored_during_execution { - label_selector { - match_expressions { - key = "app" - operator = "In" - values = ["redis-v2"] + preferred_during_scheduling_ignored_during_execution { + weight = 100 + pod_affinity_term { + label_selector { + match_expressions { + key = "app" + operator = "In" + values = ["redis-v2"] + } } + topology_key = "kubernetes.io/hostname" } - topology_key = "kubernetes.io/hostname" } } } @@ -532,8 +535,8 @@ resource "kubernetes_stateful_set_v1" "redis_v2" { } initial_delay_seconds = 15 period_seconds = 10 - timeout_seconds = 10 - failure_threshold = 5 + timeout_seconds = 3 + failure_threshold = 3 } readiness_probe { exec { @@ -577,8 +580,8 @@ resource "kubernetes_stateful_set_v1" "redis_v2" { } initial_delay_seconds = 20 period_seconds = 10 - timeout_seconds = 10 - failure_threshold = 5 + timeout_seconds = 3 + failure_threshold = 3 } readiness_probe { exec { diff --git a/stacks/technitium/modules/technitium/ha.tf b/stacks/technitium/modules/technitium/ha.tf index 71097afe..bd90cbfa 100644 --- a/stacks/technitium/modules/technitium/ha.tf +++ b/stacks/technitium/modules/technitium/ha.tf @@ -434,17 +434,12 @@ resource "kubernetes_cron_job_v1" "technitium_zone_sync" { while read -r zone; do if grep -qx "$zone" /tmp/replica_zones.txt; then - # Zone exists — reconcile primaryNameServerAddresses to the - # stable FQDN before resync. Without this, a zone created - # against an old pod IP (pre-service-ClusterIP era) stays - # pinned to that dead IP forever and zone transfers fail - # silently. Idempotent — Technitium accepts identical values. - curl -sf "$REPLICA/api/zones/options/set?token=$R_TOKEN&zone=$zone&primaryNameServerAddresses=$PRIMARY_HOST" > /dev/null || true + # Zone exists — just resync curl -sf "$REPLICA/api/zones/resync?token=$R_TOKEN&zone=$zone" > /dev/null || true else # New zone — create as Secondary and validate response echo "NEW: Creating $zone on $REPLICA" - RESP=$(curl -sf "$REPLICA/api/zones/create?token=$R_TOKEN&zone=$zone&type=Secondary&primaryNameServerAddresses=$PRIMARY_HOST" || echo '{"status":"error"}') + RESP=$(curl -sf "$REPLICA/api/zones/create?token=$R_TOKEN&zone=$zone&type=Secondary&primaryNameServerAddresses=$PRIMARY_IP" || echo '{"status":"error"}') if echo "$RESP" | grep -q '"status":"ok"'; then SYNCED=$((SYNCED + 1)) else @@ -491,14 +486,7 @@ resource "kubernetes_cron_job_v1" "technitium_zone_sync" { value = var.technitium_password } env { - # Service ClusterIP — Terraform tracks it on every apply, and the - # reconcile loop below re-applies it to every existing zone on - # every run (*/30m), so any drift (e.g. service recreate → new - # ClusterIP, or historical pod-IP values still pinned on replicas) - # self-heals within a sync cycle. Hostname form was tried but - # Technitium's own resolver doesn't forward svc.cluster.local, - # so `primaryNameServerAddresses` must be a literal IP. - name = "PRIMARY_HOST" + name = "PRIMARY_IP" value = kubernetes_service.technitium_primary.spec[0].cluster_ip } } diff --git a/stacks/traefik/modules/traefik/main.tf b/stacks/traefik/modules/traefik/main.tf index 14d0e907..788b1678 100644 --- a/stacks/traefik/modules/traefik/main.tf +++ b/stacks/traefik/modules/traefik/main.tf @@ -200,7 +200,7 @@ resource "helm_release" "traefik" { # Explicit entrypoint timeouts to bound tail latency from slow clients "--entryPoints.websecure.transport.respondingTimeouts.readTimeout=60s", "--entryPoints.websecure.transport.respondingTimeouts.writeTimeout=60s", - "--entryPoints.websecure.transport.respondingTimeouts.idleTimeout=600s", + "--entryPoints.websecure.transport.respondingTimeouts.idleTimeout=180s", # Use forwarded headers from trusted proxies "--entryPoints.websecure.forwardedHeaders.insecure=false", "--entryPoints.web.forwardedHeaders.insecure=false", diff --git a/stacks/traefik/modules/traefik/middleware.tf b/stacks/traefik/modules/traefik/middleware.tf index 2c8ae8c4..9cfac0a3 100644 --- a/stacks/traefik/modules/traefik/middleware.tf +++ b/stacks/traefik/modules/traefik/middleware.tf @@ -244,8 +244,8 @@ resource "kubernetes_manifest" "middleware_immich_rate_limit" { } spec = { rateLimit = { - average = 1000 - burst = 20000 + average = 500 + burst = 5000 } } } diff --git a/stacks/tuya-bridge/main.tf b/stacks/tuya-bridge/main.tf index 574ed95d..4d87f8aa 100644 --- a/stacks/tuya-bridge/main.tf +++ b/stacks/tuya-bridge/main.tf @@ -118,26 +118,6 @@ resource "kubernetes_deployment" "tuya-bridge" { } } } - liveness_probe { - http_get { - path = "/health" - port = 8080 - } - initial_delay_seconds = 60 - period_seconds = 30 - timeout_seconds = 5 - failure_threshold = 6 - } - readiness_probe { - http_get { - path = "/health" - port = 8080 - } - initial_delay_seconds = 10 - period_seconds = 15 - timeout_seconds = 5 - failure_threshold = 2 - } resources { requests = { cpu = "10m" diff --git a/stacks/vault/main.tf b/stacks/vault/main.tf index a47379ff..0b8ef993 100644 --- a/stacks/vault/main.tf +++ b/stacks/vault/main.tf @@ -25,6 +25,22 @@ module "tls_secret" { tls_secret_name = var.tls_secret_name } +# NFS StorageClass pointing to Proxmox host (replaces nfs-truenas for vault) +resource "kubernetes_storage_class" "nfs_proxmox" { + metadata { + name = "nfs-proxmox" + } + storage_provisioner = "nfs.csi.k8s.io" + reclaim_policy = "Retain" + volume_binding_mode = "Immediate" + allow_volume_expansion = true + parameters = { + server = "192.168.1.127" + share = "/srv/nfs" + } + mount_options = ["soft", "actimeo=5", "retrans=3", "timeo=30"] +} + resource "helm_release" "vault" { name = "vault" namespace = kubernetes_namespace.vault.metadata[0].name @@ -56,13 +72,13 @@ resource "helm_release" "vault" { dataStorage = { enabled = true size = "2Gi" - storageClass = "proxmox-lvm-encrypted" # Migrated 2026-04-25 from nfs-proxmox; raft fsync is NFS-hostile (post-mortems/2026-04-22-vault-raft-leader-deadlock.md) + storageClass = "nfs-proxmox" # Proxmox host NFS (was nfs-truenas) } auditStorage = { enabled = true size = "2Gi" - storageClass = "proxmox-lvm-encrypted" # Migrated 2026-04-25 from nfs-proxmox + storageClass = "nfs-proxmox" # Proxmox host NFS (was nfs-truenas) } standalone = { enabled = false } @@ -101,24 +117,6 @@ resource "helm_release" "vault" { } } - # fsGroupChangePolicy=OnRootMismatch skips recursive chown on restart. - # Without this, kubelet walks every file over NFS each restart; during - # 2026-04-22 outage this looped for 10m+ and blocked quorum recovery. - # The other four fields restore the chart defaults — providing pod{} - # replaces them, and missing fsGroup left vault unable to write to - # the freshly-formatted ext4 PVC during the 2026-04-25 migration. - statefulSet = { - securityContext = { - pod = { - fsGroupChangePolicy = "OnRootMismatch" - fsGroup = 1000 - runAsGroup = 1000 - runAsUser = 100 - runAsNonRoot = true - } - } - } - # Mount unseal key secret extraVolumes = [{ type = "secret" @@ -538,8 +536,7 @@ resource "vault_database_secret_backend_connection" "postgresql" { # "pg-trading", # Commented out 2026-04-06 - trading-bot disabled "pg-health", "pg-linkwarden", "pg-affine", "pg-woodpecker", "pg-claude-memory", - "pg-terraform-state", "pg-payslip-ingest", "pg-job-hunter", - "pg-wealthfolio-sync", "pg-fire-planner" + "pg-terraform-state", "pg-payslip-ingest", "pg-job-hunter" ] postgresql { @@ -693,22 +690,6 @@ resource "vault_database_secret_backend_static_role" "pg_job_hunter" { rotation_period = 604800 } -resource "vault_database_secret_backend_static_role" "pg_wealthfolio_sync" { - backend = vault_mount.database.path - db_name = vault_database_secret_backend_connection.postgresql.name - name = "pg-wealthfolio-sync" - username = "wealthfolio_sync" - rotation_period = 604800 -} - -resource "vault_database_secret_backend_static_role" "pg_fire_planner" { - backend = vault_mount.database.path - db_name = vault_database_secret_backend_connection.postgresql.name - name = "pg-fire-planner" - username = "fire_planner" - rotation_period = 604800 -} - # ============================================================================= # Kubernetes Secrets Engine — Dynamic K8s Credentials # ============================================================================= diff --git a/stacks/wealthfolio/main.tf b/stacks/wealthfolio/main.tf index df4dca48..a469e9b3 100644 --- a/stacks/wealthfolio/main.tf +++ b/stacks/wealthfolio/main.tf @@ -3,7 +3,6 @@ variable "tls_secret_name" { sensitive = true } variable "nfs_server" { type = string } -variable "postgresql_host" { type = string } resource "kubernetes_namespace" "wealthfolio" { metadata { @@ -46,52 +45,6 @@ resource "kubernetes_manifest" "external_secret" { depends_on = [kubernetes_namespace.wealthfolio] } -# DB credentials for the SQLite→PG ETL sidecar. Vault DB engine static role -# `pg-wealthfolio-sync` rotates this every 7 days; ExternalSecret refreshes -# the K8s Secret every 15m so the sidecar always has a valid password. -resource "kubernetes_manifest" "wealthfolio_sync_db_external_secret" { - manifest = { - apiVersion = "external-secrets.io/v1beta1" - kind = "ExternalSecret" - metadata = { - name = "wealthfolio-sync-db-creds" - namespace = "wealthfolio" - } - spec = { - refreshInterval = "15m" - secretStoreRef = { - name = "vault-database" - kind = "ClusterSecretStore" - } - target = { - name = "wealthfolio-sync-db-creds" - template = { - metadata = { - annotations = { - "reloader.stakater.com/match" = "true" - } - } - data = { - PGHOST = var.postgresql_host - PGPORT = "5432" - PGDATABASE = "wealthfolio_sync" - PGUSER = "wealthfolio_sync" - PGPASSWORD = "{{ .password }}" - } - } - } - data = [{ - secretKey = "password" - remoteRef = { - key = "static-creds/pg-wealthfolio-sync" - property = "password" - } - }] - } - } - depends_on = [kubernetes_namespace.wealthfolio] -} - module "tls_secret" { source = "../../modules/kubernetes/setup_tls_secret" namespace = kubernetes_namespace.wealthfolio.metadata[0].name @@ -261,181 +214,6 @@ resource "kubernetes_deployment" "wealthfolio" { limits = { memory = "64Mi" } } } - - # pg-sync sidecar — mirrors a small subset of SQLite into PG every hour - # so Grafana can chart net worth / contributions / growth via the - # `wealthfolio_sync` database. Mounts /data RO; writes to a tmp dir - # for the sqlite3 .backup snapshot to avoid blocking writers. Bootstrap - # DDL runs each iteration (CREATE TABLE IF NOT EXISTS — idempotent). - # Truncate-and-reload pattern: tables are small (~10k DAV rows, ~500 - # activities, 6 accounts), so a full reload each hour is simpler than - # incremental upserts and gives clean cold-start behaviour. - container { - name = "pg-sync" - image = "alpine:3.20" - env { - name = "PGHOST" - value_from { - secret_key_ref { - name = "wealthfolio-sync-db-creds" - key = "PGHOST" - } - } - } - env { - name = "PGPORT" - value_from { - secret_key_ref { - name = "wealthfolio-sync-db-creds" - key = "PGPORT" - } - } - } - env { - name = "PGDATABASE" - value_from { - secret_key_ref { - name = "wealthfolio-sync-db-creds" - key = "PGDATABASE" - } - } - } - env { - name = "PGUSER" - value_from { - secret_key_ref { - name = "wealthfolio-sync-db-creds" - key = "PGUSER" - } - } - } - env { - name = "PGPASSWORD" - value_from { - secret_key_ref { - name = "wealthfolio-sync-db-creds" - key = "PGPASSWORD" - } - } - } - command = ["/bin/sh", "-c", <<-EOT - set -eu - apk add --no-cache --quiet sqlite postgresql-client busybox-suid - mkdir -p /etc/crontabs /scripts /tmp/wf-sync - cat >/etc/crontabs/root <<'CRON' - # Hourly: snapshot SQLite, reload PG mirror. - 7 * * * * /scripts/sync.sh >>/proc/1/fd/1 2>&1 - CRON - cat >/scripts/sync.sh <<'SCRIPT' - #!/bin/sh - set -eu - TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) - echo "[$TS] wealthfolio-pg-sync: starting" - - # Bootstrap schema (idempotent). - psql -v ON_ERROR_STOP=1 <<'SQL' - CREATE TABLE IF NOT EXISTS accounts ( - id TEXT PRIMARY KEY, - name TEXT, - account_type TEXT, - currency TEXT, - is_active BOOLEAN - ); - CREATE TABLE IF NOT EXISTS daily_account_valuation ( - id TEXT PRIMARY KEY, - account_id TEXT NOT NULL, - valuation_date DATE NOT NULL, - account_currency TEXT, - base_currency TEXT, - fx_rate_to_base NUMERIC, - cash_balance NUMERIC, - investment_market_value NUMERIC, - total_value NUMERIC, - cost_basis NUMERIC, - net_contribution NUMERIC - ); - CREATE INDEX IF NOT EXISTS idx_dav_acct_date ON daily_account_valuation(account_id, valuation_date); - CREATE INDEX IF NOT EXISTS idx_dav_date ON daily_account_valuation(valuation_date); - CREATE TABLE IF NOT EXISTS activities ( - id TEXT PRIMARY KEY, - account_id TEXT, - asset_id TEXT, - activity_type TEXT, - activity_date TIMESTAMPTZ, - quantity NUMERIC, - unit_price NUMERIC, - amount NUMERIC, - fee NUMERIC, - currency TEXT, - fx_rate NUMERIC, - notes TEXT - ); - CREATE INDEX IF NOT EXISTS idx_act_date ON activities(activity_date); - SQL - - # Snapshot SQLite (online backup — non-blocking). - rm -f /tmp/wf-sync/snapshot.db - sqlite3 /data/wealthfolio.db ".backup /tmp/wf-sync/snapshot.db" - - # Dump source rows to TSV. - sqlite3 -separator $'\t' /tmp/wf-sync/snapshot.db \ - "SELECT id, name, account_type, currency, is_active FROM accounts;" \ - > /tmp/wf-sync/accounts.tsv - - sqlite3 -separator $'\t' /tmp/wf-sync/snapshot.db <<'SQ' > /tmp/wf-sync/dav.tsv - SELECT id, account_id, valuation_date, account_currency, base_currency, - CAST(fx_rate_to_base AS REAL), - CAST(cash_balance AS REAL), - CAST(investment_market_value AS REAL), - CAST(total_value AS REAL), - CAST(cost_basis AS REAL), - CAST(net_contribution AS REAL) - FROM daily_account_valuation - WHERE account_id != 'TOTAL'; -- synthetic pre-aggregated row; would double-count when summed - SQ - - sqlite3 -separator $'\t' /tmp/wf-sync/snapshot.db <<'SQ' > /tmp/wf-sync/activities.tsv - SELECT id, account_id, asset_id, activity_type, activity_date, - CAST(quantity AS REAL), - CAST(unit_price AS REAL), - CAST(amount AS REAL), - CAST(fee AS REAL), - currency, - CAST(fx_rate AS REAL), - notes - FROM activities WHERE status='POSTED'; - SQ - - # Truncate-and-reload (small tables; simpler than upserts). - psql -v ON_ERROR_STOP=1 <