From ae0c1701ecccc44523dbf3f91f35061d61df1567 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 18 May 2026 19:26:39 +0000 Subject: [PATCH] security(wave1): W1.2 Vault XFF (applied) + W1.4/W1.5 Kyverno code prep (apply blocked on provider crash) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## W1.2 — Vault audit device + X-Forwarded-For (APPLIED + VERIFIED) - Added `x_forwarded_for_authorized_addrs = "10.10.0.0/16"` to vault listener config. Trust X-Forwarded-For from in-cluster sources (pod CIDR). Without this, every vault audit log entry shows Traefik's pod IP instead of the real client IP — the V7 alert rule (Viktor identity from non-allowlist source IP) needs the real client IP to be meaningful. - Applied via `tg apply -target=helm_release.vault` (vault stack has pre-existing for_each unknown issues unrelated to this change; -target documented in error message itself as the workaround). - Rolling restart of vault-{0,1,2} performed manually (StatefulSet uses OnDelete update strategy, not RollingUpdate). All 3 pods rejoined Raft + auto-unsealed within ~10s each. Verified XFF config visible in pod's /vault/config/extraconfig-from-values.hcl. - The `vault_audit "file"` resource was already in TF at line 287 (writing to /vault/audit/vault-audit.log) — no change needed. ## W1.4 + W1.5 — Kyverno enforce flip (CODE ONLY, apply BLOCKED) - Added shared `local.security_policy_exclude_namespaces` (31 critical namespaces from memory id=1970 + `frigate, kured, default, changedetection` discovered during the live-cluster pre-flight check for privileged/hostNetwork/SYS_ADMIN pods that would be blocked by Enforce). - Flipped 3 security policies Audit → Enforce: deny-privileged-containers, deny-host-namespaces, restrict-sys-admin. failurePolicy=Ignore preserved at chart level. - `require-trusted-registries` STAYS in Audit mode pending allowlist tightening (current pattern includes `*/*` which matches anything-with-a-slash, so Enforce would be a no-op for supply chain). Tracked under beads `code-8ywc` W1.5. **Apply blocker**: `tg plan` panics with `terraform-provider-kubernetes_v3.1.0` crash on the kubernetes_manifest resources (`ElementKeyInt(0): can't use tftypes.Object...` — provider schema mismatch on Kyverno CRDs). The crash reproduces on the UNMODIFIED file, so it's a pre-existing provider issue, not caused by these changes. Resolving it requires either upgrading the provider or finding a kubernetes_manifest-compatible workaround. Tracked under `code-8ywc`. ## Wave 1 status after this commit - W1.2: APPLIED + VERIFIED (vault XFF + audit device already in place) - W1.4 + W1.5: code ready, apply blocked on provider crash - W1.1, W1.3, W1.6, W1.7: not started in this session Co-Authored-By: Claude Opus 4.7 --- .../modules/kyverno/security-policies.tf | 53 ++++++++++++++++--- stacks/vault/main.tf | 9 ++++ 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/stacks/kyverno/modules/kyverno/security-policies.tf b/stacks/kyverno/modules/kyverno/security-policies.tf index 00bcf3a6..45d8b3a3 100644 --- a/stacks/kyverno/modules/kyverno/security-policies.tf +++ b/stacks/kyverno/modules/kyverno/security-policies.tf @@ -1,8 +1,33 @@ # ============================================================================= -# Pod Security Policies (Audit Mode) +# Pod Security Policies # ============================================================================= # Kyverno validate policies for pod security standards. -# All policies start in Audit mode - violations are logged but not blocked. +# Wave 1 (locked 2026-05-18, beads code-8ywc): deny-privileged-containers, +# deny-host-namespaces, restrict-sys-admin flipped from Audit → Enforce with +# a shared 32-namespace exclude list. require-trusted-registries STAYS in +# Audit until the allowlist pattern is tightened beyond `*/*` (separate work +# item — current pattern allows everything with a slash, so Enforce would be +# a no-op for supply-chain protection). +# failurePolicy stays Ignore (chart-level) to prevent admission webhook +# failures from cascading. + +# Shared namespace exclude list — 31 critical namespaces from the Keel rollout +# (memory id=1970) + `frigate` (legitimately needs host access for camera RTSP). +locals { + security_policy_exclude_namespaces = [ + "keel", "calico-system", "authentik", "vault", "cnpg-system", "dbaas", + "monitoring", "traefik", "technitium", "mailserver", "kyverno", + "metallb-system", "external-secrets", "proxmox-csi", "nfs-csi", "nvidia", + "kube-system", "cloudflared", "crowdsec", "reverse-proxy", "reloader", + "descheduler", "vpa", "redis", "sealed-secrets", "headscale", "wireguard", + "xray", "infra-maintenance", "metrics-server", "tigera-operator", "frigate", + # Additions discovered during wave 1 enforce flip — these contain workloads + # that legitimately need privileged / hostNetwork / SYS_ADMIN: + "kured", # kured DaemonSet is privileged (manages node reboots) + "default", # etcd backup + defrag CronJobs use hostNetwork + "changedetection", # uses SYS_ADMIN for chromium sandbox + ] +} resource "kubernetes_manifest" "policy_deny_privileged" { manifest = { @@ -18,7 +43,7 @@ resource "kubernetes_manifest" "policy_deny_privileged" { } } spec = { - validationFailureAction = "Audit" + validationFailureAction = "Enforce" background = true rules = [{ name = "deny-privileged" @@ -32,7 +57,7 @@ resource "kubernetes_manifest" "policy_deny_privileged" { exclude = { any = [{ resources = { - namespaces = ["frigate", "nvidia", "monitoring"] + namespaces = local.security_policy_exclude_namespaces } }] } @@ -74,7 +99,7 @@ resource "kubernetes_manifest" "policy_deny_host_namespaces" { } } spec = { - validationFailureAction = "Audit" + validationFailureAction = "Enforce" background = true rules = [{ name = "deny-host-namespaces" @@ -88,7 +113,7 @@ resource "kubernetes_manifest" "policy_deny_host_namespaces" { exclude = { any = [{ resources = { - namespaces = ["frigate", "monitoring"] + namespaces = local.security_policy_exclude_namespaces } }] } @@ -123,7 +148,7 @@ resource "kubernetes_manifest" "policy_restrict_capabilities" { } } spec = { - validationFailureAction = "Audit" + validationFailureAction = "Enforce" background = true rules = [{ name = "restrict-sys-admin" @@ -137,7 +162,7 @@ resource "kubernetes_manifest" "policy_restrict_capabilities" { exclude = { any = [{ resources = { - namespaces = ["nvidia", "monitoring"] + namespaces = local.security_policy_exclude_namespaces } }] } @@ -265,6 +290,11 @@ resource "kubernetes_manifest" "policy_require_trusted_registries" { } } spec = { + # NOTE: Stays in Audit mode pending allowlist tightening. The current + # pattern includes `*/*` which matches any image with a registry — flipping + # to Enforce would not actually restrict supply chain. Tightening the + # allowlist to a precise enumeration of in-use registries is tracked + # separately under beads code-8ywc (W1.5 follow-up). validationFailureAction = "Audit" background = true rules = [{ @@ -276,6 +306,13 @@ resource "kubernetes_manifest" "policy_require_trusted_registries" { } }] } + exclude = { + any = [{ + resources = { + namespaces = local.security_policy_exclude_namespaces + } + }] + } validate = { message = "Images must be from trusted registries (docker.io, ghcr.io, quay.io, registry.k8s.io, or local cache)." pattern = { diff --git a/stacks/vault/main.tf b/stacks/vault/main.tf index f222db37..ad0c8d93 100644 --- a/stacks/vault/main.tf +++ b/stacks/vault/main.tf @@ -89,6 +89,15 @@ resource "helm_release" "vault" { tls_disable = 1 address = "[::]:8200" cluster_address = "[::]:8201" + + # Trust X-Forwarded-For from in-cluster sources only (Traefik runs in pod CIDR). + # Without this, every audit log entry shows Traefik's pod IP instead of the + # real client IP — V7 (Viktor identity from non-allowlist source IP) needs the + # real client IP to work. Pod CIDR = 10.10.0.0/16 per kubeadm-config. + # See docs/architecture/security.md "Audit Logging & Anomaly Detection". + x_forwarded_for_authorized_addrs = "10.10.0.0/16" + x_forwarded_for_reject_not_authorized = false + x_forwarded_for_reject_not_present = false } storage "raft" {