2026-02-22 13:01:37 +00:00
|
|
|
# Root Terragrunt configuration
|
|
|
|
|
# Provides DRY provider, backend, and variable loading for all stacks.
|
|
|
|
|
|
|
|
|
|
# Each stack gets its own local state file under state/<stack-name>/
|
|
|
|
|
remote_state {
|
|
|
|
|
backend = "local"
|
|
|
|
|
generate = {
|
|
|
|
|
path = "backend.tf"
|
|
|
|
|
if_exists = "overwrite_terragrunt"
|
|
|
|
|
}
|
|
|
|
|
config = {
|
|
|
|
|
path = "${get_repo_root()}/state/${path_relative_to_include()}/terraform.tfstate"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
remove SOPS pipeline, deploy ESO + Vault DB/K8s engines
Vault is now the sole source of truth for secrets. SOPS pipeline
removed entirely — auth via `vault login -method=oidc`.
Part A: SOPS removal
- vault/main.tf: delete 990 lines (93 vars + 43 KV write resources),
add self-read data source for OIDC creds from secret/vault
- terragrunt.hcl: remove SOPS var loading, vault_root_token, check_secrets hook
- scripts/tg: remove SOPS decryption, keep -auto-approve logic
- .woodpecker/default.yml: replace SOPS with Vault K8s auth via curl
- Delete secrets.sops.json, .sops.yaml
Part B: External Secrets Operator
- New stack stacks/external-secrets/ with Helm chart + 2 ClusterSecretStores
(vault-kv for KV v2, vault-database for DB engine)
Part C: Database secrets engine (in vault/main.tf)
- MySQL + PostgreSQL connections with static role rotation (24h)
- 6 MySQL roles (speedtest, wrongmove, codimd, nextcloud, shlink, grafana)
- 6 PostgreSQL roles (trading, health, linkwarden, affine, woodpecker, claude_memory)
Part D: Kubernetes secrets engine (in vault/main.tf)
- RBAC for Vault SA to manage K8s tokens
- Roles: dashboard-admin, ci-deployer, openclaw, local-admin
- New scripts/vault-kubeconfig helper for dynamic kubeconfig
K8s auth method with scoped policies for CI, ESO, OpenClaw, Woodpecker sync.
2026-03-15 16:37:38 +00:00
|
|
|
# Load config.tfvars (plaintext) + terraform.tfvars (git-crypt encrypted, migration).
|
|
|
|
|
# Secrets come from Vault KV — authenticate via `vault login -method=oidc`.
|
2026-02-22 13:01:37 +00:00
|
|
|
terraform {
|
|
|
|
|
extra_arguments "common_vars" {
|
|
|
|
|
commands = get_terraform_commands_that_need_vars()
|
|
|
|
|
required_var_files = [
|
2026-03-07 14:16:28 +00:00
|
|
|
"${get_repo_root()}/config.tfvars"
|
|
|
|
|
]
|
|
|
|
|
optional_var_files = [
|
remove SOPS pipeline, deploy ESO + Vault DB/K8s engines
Vault is now the sole source of truth for secrets. SOPS pipeline
removed entirely — auth via `vault login -method=oidc`.
Part A: SOPS removal
- vault/main.tf: delete 990 lines (93 vars + 43 KV write resources),
add self-read data source for OIDC creds from secret/vault
- terragrunt.hcl: remove SOPS var loading, vault_root_token, check_secrets hook
- scripts/tg: remove SOPS decryption, keep -auto-approve logic
- .woodpecker/default.yml: replace SOPS with Vault K8s auth via curl
- Delete secrets.sops.json, .sops.yaml
Part B: External Secrets Operator
- New stack stacks/external-secrets/ with Helm chart + 2 ClusterSecretStores
(vault-kv for KV v2, vault-database for DB engine)
Part C: Database secrets engine (in vault/main.tf)
- MySQL + PostgreSQL connections with static role rotation (24h)
- 6 MySQL roles (speedtest, wrongmove, codimd, nextcloud, shlink, grafana)
- 6 PostgreSQL roles (trading, health, linkwarden, affine, woodpecker, claude_memory)
Part D: Kubernetes secrets engine (in vault/main.tf)
- RBAC for Vault SA to manage K8s tokens
- Roles: dashboard-admin, ci-deployer, openclaw, local-admin
- New scripts/vault-kubeconfig helper for dynamic kubeconfig
K8s auth method with scoped policies for CI, ESO, OpenClaw, Woodpecker sync.
2026-03-15 16:37:38 +00:00
|
|
|
"${get_repo_root()}/terraform.tfvars"
|
2026-02-22 13:01:37 +00:00
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-17 22:37:56 +00:00
|
|
|
extra_arguments "no_backup" {
|
|
|
|
|
commands = ["apply", "plan", "destroy", "import"]
|
|
|
|
|
arguments = ["-backup=-"]
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-22 13:01:37 +00:00
|
|
|
extra_arguments "kube_config" {
|
|
|
|
|
commands = get_terraform_commands_that_need_vars()
|
|
|
|
|
arguments = [
|
|
|
|
|
"-var", "kube_config_path=${get_repo_root()}/config"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Generate kubernetes + helm providers for K8s stacks.
|
|
|
|
|
# The infra stack overrides this to add the proxmox provider.
|
|
|
|
|
generate "k8s_providers" {
|
|
|
|
|
path = "providers.tf"
|
|
|
|
|
if_exists = "overwrite_terragrunt"
|
|
|
|
|
contents = <<EOF
|
2026-03-14 17:15:48 +00:00
|
|
|
terraform {
|
|
|
|
|
required_providers {
|
|
|
|
|
vault = {
|
|
|
|
|
source = "hashicorp/vault"
|
|
|
|
|
version = "~> 4.0"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-22 13:01:37 +00:00
|
|
|
variable "kube_config_path" {
|
|
|
|
|
type = string
|
|
|
|
|
default = "~/.kube/config"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
provider "kubernetes" {
|
|
|
|
|
config_path = var.kube_config_path
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
provider "helm" {
|
2026-02-22 13:35:10 +00:00
|
|
|
kubernetes = {
|
2026-02-22 13:01:37 +00:00
|
|
|
config_path = var.kube_config_path
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-14 17:15:48 +00:00
|
|
|
|
|
|
|
|
provider "vault" {
|
|
|
|
|
address = "https://vault.viktorbarzin.me"
|
|
|
|
|
skip_child_token = true
|
|
|
|
|
}
|
2026-02-22 13:01:37 +00:00
|
|
|
EOF
|
|
|
|
|
}
|
[ci skip] Infrastructure hardening: security, monitoring, reliability, maintainability
Phase 1 - Critical Security:
- Netbox: move hardcoded DB/superuser passwords to variables
- MeshCentral: disable public registration, add Authentik auth
- Traefik: disable insecure API dashboard (api.insecure=false)
- Traefik: configure forwarded headers with Cloudflare trusted IPs
Phase 2 - Security Hardening:
- Add security headers middleware (HSTS, X-Frame-Options, nosniff, etc.)
- Add Kyverno pod security policies in audit mode (privileged, host
namespaces, SYS_ADMIN, trusted registries)
- Tighten rate limiting (avg=10, burst=50)
- Add Authentik protection to grampsweb
Phase 3 - Monitoring & Alerting:
- Add critical service alerts (PostgreSQL, MySQL, Redis, Headscale,
Authentik, Loki)
- Increase Loki retention from 7 to 30 days (720h)
- Add predictive PV filling alert (predict_linear)
- Re-enable Hackmd and Privatebin down alerts
Phase 4 - Reliability:
- Add resource requests/limits to Redis, DBaaS, Technitium, Headscale,
Vaultwarden, Uptime Kuma
- Increase Alloy DaemonSet memory to 512Mi/1Gi
Phase 6 - Maintainability:
- Extract duplicated tiers locals to terragrunt.hcl generate block
(removed from 67 stacks)
- Replace hardcoded NFS IP 10.0.10.15 with var.nfs_server (114
instances across 63 files)
- Replace hardcoded Redis/PostgreSQL/MySQL/Ollama/mail host references
with variables across ~35 stacks
- Migrate xray raw ingress resources to ingress_factory modules
2026-02-23 22:05:28 +00:00
|
|
|
|
|
|
|
|
# Generate shared tiers locals for all stacks.
|
|
|
|
|
# Previously duplicated in 67+ stacks; now defined once here.
|
|
|
|
|
generate "tiers" {
|
|
|
|
|
path = "tiers.tf"
|
|
|
|
|
if_exists = "overwrite_terragrunt"
|
|
|
|
|
contents = <<EOF
|
|
|
|
|
locals {
|
|
|
|
|
tiers = {
|
|
|
|
|
core = "0-core"
|
|
|
|
|
cluster = "1-cluster"
|
|
|
|
|
gpu = "2-gpu"
|
|
|
|
|
edge = "3-edge"
|
|
|
|
|
aux = "4-aux"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
EOF
|
|
|
|
|
}
|