WHAT LANDED:
- terragrunt.hcl (root): added telmate/proxmox to k8s_providers
required_providers. Other stacks just don't instantiate a provider
block — harmless. Replaces the same-name override trick the infra
stack used to do, which stopped working under Terragrunt v0.77
("Detected generate blocks with the same name").
- stacks/infra/terragrunt.hcl: new generate "proxmox_provider" block
writes proxmox_provider.tf with the provider config; credentials
read from Vault secret/viktor at plan/apply time (no env vars).
- modules/create-vm: new mbps_rd / mbps_wr number variables (default 0
= uncapped), wired into scsi0/scsi1 disk{} blocks as
mbps_r_concurrent / mbps_wr_concurrent. lifecycle.ignore_changes
extended to scsi6..scsi29 (K8s nodes have many CSI-managed slots),
plus scsihw and qemu_os (vary per-VM; non-trivial live changes).
- stacks/infra/main.tf: docker-registry-vm gains mbps_rd=40,
mbps_wr=40 in HCL — already applied live via qm set on 2026-05-26.
WHAT FAILED AND WAS ROLLED BACK:
- Attempted import of 7 VMs (102 devvm, 103 home-assistant, 200
k8s-master, 201 k8s-node1, 202 k8s-node2, 203 k8s-node3, 204
k8s-node4) via import {} blocks. The telmate/proxmox v3.0.2-rc07
provider mangled proxmox-csi PVC slots on apply for vmid 202 and
203: every scsi slot got rewritten from `vm-9999-pvc-<uuid>` to
the boot disk `vm-<vmid>-disk-0`. Restored both .conf files from
the 2026-05-24 nightly PVE config backup at /mnt/backup/pve-config/
etc-pve/nodes/pve/qemu-server/{202,203}.conf — no reboots, no data
loss, K8s CSI reconciled PVC attachments within minutes. Removed
the 7 imports from state via `terraform state rm` and re-encrypted.
Tracked in beads code-xzbl: blocked on bpg/proxmox provider
migration (telmate has the same dynamic-disk defect that bit us on
iSCSI back in 2026-04-02; see memory id=539).
LIVE CAPS STILL IN PLACE (qm set, 2026-05-26 ~03:13 UTC):
102 devvm 60/60 103 home-assistant 40/40 200 k8s-master 100/60
201 k8s-node1 150/120 202 k8s-node2 150/120 203 k8s-node3 150/120
204 k8s-node4 150/120 220 docker-registry 40/40
(pfSense 101 BSD + Windows10 300 intentionally out of scope.)
PRE-EXISTING DRIFT EXPOSED (NOT NEW):
- HCL declares k8s-master (200) and k8s-node2 (202) but neither was
ever imported into TF state — confirmed against the SOPS-encrypted
state in git (lineage e1cc5bb5, serial 42, last touched 2026-04-06).
This commit leaves both declarations in place but does NOT import
them; that's part of the code-xzbl follow-up.
Closes: code-s9xr
151 lines
4.1 KiB
HCL
151 lines
4.1 KiB
HCL
# Root Terragrunt configuration
|
|
# Provides DRY provider, backend, and variable loading for all stacks.
|
|
|
|
# Two-tier state backend:
|
|
# Tier 0 (bootstrap): local state, SOPS-encrypted in git — must exist before PG is reachable.
|
|
# Tier 1 (everything else): PG backend on CNPG cluster, native pg_advisory_lock.
|
|
locals {
|
|
tier0_stacks = ["infra", "platform", "cnpg", "vault", "dbaas", "external-secrets"]
|
|
stack_name = replace(path_relative_to_include(), "stacks/", "")
|
|
is_tier0 = contains(local.tier0_stacks, local.stack_name)
|
|
}
|
|
|
|
remote_state {
|
|
backend = local.is_tier0 ? "local" : "pg"
|
|
generate = {
|
|
path = "backend.tf"
|
|
if_exists = "overwrite_terragrunt"
|
|
}
|
|
config = local.is_tier0 ? {
|
|
path = "${get_repo_root()}/state/${path_relative_to_include()}/terraform.tfstate"
|
|
} : {
|
|
conn_str = get_env("PG_CONN_STR", "")
|
|
schema_name = local.stack_name
|
|
}
|
|
}
|
|
|
|
# Load config.tfvars (plaintext). Secrets come from Vault KV — authenticate via `vault login -method=oidc`.
|
|
terraform {
|
|
extra_arguments "common_vars" {
|
|
commands = get_terraform_commands_that_need_vars()
|
|
required_var_files = [
|
|
"${get_repo_root()}/config.tfvars"
|
|
]
|
|
}
|
|
|
|
extra_arguments "no_backup" {
|
|
commands = ["apply", "plan", "destroy", "import"]
|
|
arguments = ["-backup=-"]
|
|
}
|
|
|
|
extra_arguments "kube_config" {
|
|
commands = get_terraform_commands_that_need_vars()
|
|
arguments = [
|
|
"-var", "kube_config_path=${get_repo_root()}/config"
|
|
]
|
|
}
|
|
}
|
|
|
|
# Generate kubernetes + helm + cloudflare + proxmox providers for all stacks.
|
|
# (Stacks that don't use proxmox simply omit any `provider "proxmox" {}` block;
|
|
# the required_providers entry is harmless. The pre-2026-05-26 trick of the
|
|
# infra stack overriding this block to add proxmox stopped working under
|
|
# Terragrunt v0.77 — same-name generate blocks are now forbidden — so proxmox
|
|
# is declared globally instead. The `provider "proxmox" {}` config lives in
|
|
# stacks/infra/terragrunt.hcl, generated under a different filename so it
|
|
# doesn't collide with this providers.tf.)
|
|
generate "k8s_providers" {
|
|
path = "providers.tf"
|
|
if_exists = "overwrite_terragrunt"
|
|
contents = <<EOF
|
|
terraform {
|
|
required_providers {
|
|
vault = {
|
|
source = "hashicorp/vault"
|
|
version = "~> 4.0"
|
|
}
|
|
cloudflare = {
|
|
source = "cloudflare/cloudflare"
|
|
version = "~> 4"
|
|
}
|
|
authentik = {
|
|
source = "goauthentik/authentik"
|
|
version = "~> 2024.10"
|
|
}
|
|
# kubectl (gavinbunney) — workaround for hashicorp/kubernetes
|
|
# `kubernetes_manifest` panics on Kyverno CRDs. See beads code-e2dp.
|
|
# Declared for all stacks but only used where opted-in.
|
|
kubectl = {
|
|
source = "gavinbunney/kubectl"
|
|
version = "~> 1.14"
|
|
}
|
|
proxmox = {
|
|
source = "telmate/proxmox"
|
|
version = "3.0.2-rc07"
|
|
}
|
|
}
|
|
}
|
|
|
|
variable "kube_config_path" {
|
|
type = string
|
|
default = "~/.kube/config"
|
|
}
|
|
|
|
provider "kubernetes" {
|
|
config_path = var.kube_config_path
|
|
}
|
|
|
|
provider "helm" {
|
|
kubernetes = {
|
|
config_path = var.kube_config_path
|
|
}
|
|
}
|
|
|
|
provider "vault" {
|
|
address = "https://vault.viktorbarzin.me"
|
|
skip_child_token = true
|
|
}
|
|
|
|
provider "kubectl" {
|
|
config_path = var.kube_config_path
|
|
load_config_file = true
|
|
}
|
|
EOF
|
|
}
|
|
|
|
# Generate Cloudflare provider config (separate file to avoid conflicts
|
|
# with stacks that override providers.tf, e.g. infra stack).
|
|
# DNS records are created per-service via ingress_factory's dns_type param.
|
|
generate "cloudflare_provider" {
|
|
path = "cloudflare_provider.tf"
|
|
if_exists = "overwrite_terragrunt"
|
|
contents = <<EOF
|
|
data "vault_kv_secret_v2" "cf_platform" {
|
|
mount = "secret"
|
|
name = "platform"
|
|
}
|
|
|
|
provider "cloudflare" {
|
|
api_key = data.vault_kv_secret_v2.cf_platform.data["cloudflare_api_key"]
|
|
email = "vbarzin@gmail.com"
|
|
}
|
|
EOF
|
|
}
|
|
|
|
# Generate shared tiers locals for all stacks.
|
|
# Previously duplicated in 67+ stacks; now defined once here.
|
|
generate "tiers" {
|
|
path = "tiers.tf"
|
|
if_exists = "overwrite_terragrunt"
|
|
contents = <<EOF
|
|
locals {
|
|
tiers = {
|
|
core = "0-core"
|
|
cluster = "1-cluster"
|
|
gpu = "2-gpu"
|
|
edge = "3-edge"
|
|
aux = "4-aux"
|
|
}
|
|
}
|
|
EOF
|
|
}
|