2026-03-17 21:42:16 +00:00
|
|
|
variable "tier" { type = string }
|
|
|
|
|
variable "nfs_server" { type = string }
|
|
|
|
|
|
|
|
|
|
resource "kubernetes_namespace" "nfs_csi" {
|
|
|
|
|
metadata {
|
|
|
|
|
name = "nfs-csi"
|
|
|
|
|
labels = {
|
|
|
|
|
tier = var.tier
|
|
|
|
|
}
|
[infra] Suppress Goldilocks vpa-update-mode label drift on all namespaces [ci skip]
## Context
Wave 3B-continued: the Goldilocks VPA dashboard (stacks/vpa) runs a Kyverno
ClusterPolicy `goldilocks-vpa-auto-mode` that mutates every namespace with
`metadata.labels["goldilocks.fairwinds.com/vpa-update-mode"] = "off"`. This
is intentional — Terraform owns container resource limits, and Goldilocks
should only provide recommendations, never auto-update. The label is how
Goldilocks decides per-namespace whether to run its VPA in `off` mode.
Effect on Terraform: every `kubernetes_namespace` resource shows the label
as pending-removal (`-> null`) on every `scripts/tg plan`. Dawarich survey
2026-04-18 confirmed the drift. Cluster-side count: 88 namespaces carry the
label (`kubectl get ns -o json | jq ... | wc -l`). Every TF-managed namespace
is affected.
This commit brings the intentional admission drift under the same
`# KYVERNO_LIFECYCLE_V1` discoverability marker introduced in c9d221d5 for
the ndots dns_config pattern. The marker now stands generically for any
Kyverno admission-webhook drift suppression; the inline comment records
which specific policy stamps which specific field so future grep audits
show why each suppression exists.
## This change
107 `.tf` files touched — every stack's `resource "kubernetes_namespace"`
resource gets:
```hcl
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
```
Injection was done with a brace-depth-tracking Python pass (`/tmp/add_goldilocks_ignore.py`):
match `^resource "kubernetes_namespace" ` → track `{` / `}` until the
outermost closing brace → insert the lifecycle block before the closing
brace. The script is idempotent (skips any file that already mentions
`goldilocks.fairwinds.com/vpa-update-mode`) so re-running is safe.
Vault stack picked up 2 namespaces in the same file (k8s-users produces
one, plus a second explicit ns) — confirmed via file diff (+8 lines).
## What is NOT in this change
- `stacks/trading-bot/main.tf` — entire file is `/* … */` commented out
(paused 2026-04-06 per user decision). Reverted after the script ran.
- `stacks/_template/main.tf.example` — per-stack skeleton, intentionally
minimal. User keeps it that way. Not touched by the script (file
has no real `resource "kubernetes_namespace"` — only a placeholder
comment).
- `.terraform/` copies (e.g. `stacks/metallb/.terraform/modules/...`) —
gitignored, won't commit; the live path was edited.
- `terraform fmt` cleanup of adjacent pre-existing alignment issues in
authentik, freedify, hermes-agent, nvidia, vault, meshcentral. Reverted
to keep the commit scoped to the Goldilocks sweep. Those files will
need a separate fmt-only commit or will be cleaned up on next real
apply to that stack.
## Verification
Dawarich (one of the hundred-plus touched stacks) showed the pattern
before and after:
```
$ cd stacks/dawarich && ../../scripts/tg plan
Before:
Plan: 0 to add, 2 to change, 0 to destroy.
# kubernetes_namespace.dawarich will be updated in-place
(goldilocks.fairwinds.com/vpa-update-mode -> null)
# module.tls_secret.kubernetes_secret.tls_secret will be updated in-place
(Kyverno generate.* labels — fixed in 8d94688d)
After:
No changes. Your infrastructure matches the configuration.
```
Injection count check:
```
$ rg -c 'KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode' stacks/ | awk -F: '{s+=$2} END {print s}'
108
```
## Reproduce locally
1. `git pull`
2. Pick any stack: `cd stacks/<name> && ../../scripts/tg plan`
3. Expect: no drift on the namespace's goldilocks.fairwinds.com/vpa-update-mode label.
Closes: code-dwx
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 21:15:27 +00:00
|
|
|
}
|
|
|
|
|
lifecycle {
|
|
|
|
|
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
|
|
|
|
|
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
|
2026-03-17 21:42:16 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
resource "helm_release" "nfs_csi_driver" {
|
|
|
|
|
namespace = kubernetes_namespace.nfs_csi.metadata[0].name
|
|
|
|
|
create_namespace = false
|
|
|
|
|
name = "csi-driver-nfs"
|
|
|
|
|
atomic = true
|
|
|
|
|
timeout = 300
|
|
|
|
|
|
|
|
|
|
repository = "https://raw.githubusercontent.com/kubernetes-csi/csi-driver-nfs/master/charts"
|
|
|
|
|
chart = "csi-driver-nfs"
|
|
|
|
|
|
|
|
|
|
values = [yamlencode({
|
|
|
|
|
controller = {
|
|
|
|
|
replicas = 2
|
2026-04-06 11:54:45 +03:00
|
|
|
livenessProbe = {
|
|
|
|
|
httpPort = 29653
|
|
|
|
|
}
|
2026-03-17 21:42:16 +00:00
|
|
|
resources = {
|
|
|
|
|
csiProvisioner = {
|
|
|
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
|
|
|
limits = { memory = "128Mi" }
|
|
|
|
|
}
|
|
|
|
|
csiResizer = {
|
|
|
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
|
|
|
limits = { memory = "128Mi" }
|
|
|
|
|
}
|
|
|
|
|
csiSnapshotter = {
|
|
|
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
|
|
|
limits = { memory = "128Mi" }
|
|
|
|
|
}
|
|
|
|
|
nfs = {
|
|
|
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
|
|
|
limits = { memory = "128Mi" }
|
|
|
|
|
}
|
|
|
|
|
livenessProbe = {
|
|
|
|
|
requests = { cpu = "10m", memory = "64Mi" }
|
|
|
|
|
limits = { memory = "64Mi" }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
node = {
|
|
|
|
|
resources = {
|
|
|
|
|
nfs = {
|
|
|
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
|
|
|
limits = { memory = "128Mi" }
|
|
|
|
|
}
|
|
|
|
|
livenessProbe = {
|
|
|
|
|
requests = { cpu = "10m", memory = "64Mi" }
|
|
|
|
|
limits = { memory = "64Mi" }
|
|
|
|
|
}
|
|
|
|
|
nodeDriverRegistrar = {
|
|
|
|
|
requests = { cpu = "10m", memory = "64Mi" }
|
|
|
|
|
limits = { memory = "64Mi" }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
storageClass = {
|
|
|
|
|
create = false
|
|
|
|
|
}
|
|
|
|
|
})]
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-19 16:57:05 +00:00
|
|
|
# Historical name retained for PV compatibility — 48 bound PVs reference
|
|
|
|
|
# storageClassName: nfs-truenas. The actual backend is the Proxmox host NFS
|
|
|
|
|
# (var.nfs_server = 192.168.1.127) since TrueNAS was decommissioned
|
|
|
|
|
# 2026-04-13. SC names are immutable on PVs, so renaming would require
|
|
|
|
|
# migrating every PV. Not worth the churn for a cosmetic change.
|
2026-03-17 21:42:16 +00:00
|
|
|
resource "kubernetes_storage_class" "nfs_truenas" {
|
|
|
|
|
metadata {
|
|
|
|
|
name = "nfs-truenas"
|
|
|
|
|
}
|
|
|
|
|
storage_provisioner = "nfs.csi.k8s.io"
|
|
|
|
|
reclaim_policy = "Retain"
|
|
|
|
|
volume_binding_mode = "Immediate"
|
|
|
|
|
|
|
|
|
|
mount_options = [
|
2026-04-14 10:28:27 +00:00
|
|
|
"nfsvers=4",
|
2026-03-17 21:42:16 +00:00
|
|
|
"soft",
|
|
|
|
|
"timeo=30",
|
|
|
|
|
"retrans=3",
|
|
|
|
|
"actimeo=5",
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
parameters = {
|
2026-04-13 14:41:15 +00:00
|
|
|
server = var.nfs_server
|
truenas deprecation: migrate all non-immich storage to proxmox NFS
- Migrate 7 backup CronJobs to Proxmox host NFS (192.168.1.127)
(etcd, mysql, postgresql, nextcloud, redis, vaultwarden, plotting-book)
- Migrate headscale backup, ebook2audiobook, osm_routing to Proxmox NFS
- Migrate servarr (lidarr, readarr, soulseek) NFS refs to Proxmox
- Remove 79 orphaned TrueNAS NFS module declarations from 49 stacks
- Delete stacks/platform/modules/ (27 dead module copies, 65MB)
- Update nfs-truenas StorageClass to point to Proxmox (192.168.1.127)
- Remove iscsi DNS record from config.tfvars
- Fix woodpecker persistence config and alertmanager PV
Only Immich (8 PVCs, ~1.4TB) remains on TrueNAS.
2026-04-12 14:35:39 +01:00
|
|
|
share = "/srv/nfs"
|
2026-03-17 21:42:16 +00:00
|
|
|
}
|
|
|
|
|
}
|