fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip]

6d224861 came from a --no-checkout worktree whose empty index made the
commit drop every file except two. This restores 05b50d2b's full tree and
correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su
entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the
live infra was never applied from the broken commit.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-09 08:45:33 +00:00
parent 6d224861c4
commit fd0f4a0365
1166 changed files with 358546 additions and 0 deletions

View file

@ -0,0 +1,72 @@
# =============================================================================
# Pod Dependency Init Container Injection
# =============================================================================
# Reads the annotation dependency.kyverno.io/wait-for from pods and injects
# init containers that wait for each listed dependency to be reachable.
#
# Usage:
# annotations:
# dependency.kyverno.io/wait-for: "postgresql.dbaas:5432,redis-master.redis:6379"
#
# Each comma-separated entry becomes a busybox init container that runs
# `nc -z <host> <port>` in a loop until the dependency is reachable.
# Existing init containers are preserved Kyverno appends to the array.
resource "kubectl_manifest" "inject_dependency_init_containers" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "inject-dependency-init-containers"
annotations = {
"policies.kyverno.io/title" = "Inject Dependency Init Containers"
"policies.kyverno.io/description" = "Injects wait-for init containers based on dependency.kyverno.io/wait-for pod annotation. Each comma-separated host:port entry becomes a busybox init container that blocks until the dependency is reachable via nc -z."
}
}
spec = {
rules = [
{
name = "wait-for-dependencies"
match = {
any = [
{
resources = {
kinds = ["Pod"]
operations = ["CREATE"]
}
}
]
}
preconditions = {
all = [
{
key = "{{ request.object.metadata.annotations.\"dependency.kyverno.io/wait-for\" || '' }}"
operator = "NotEquals"
value = ""
}
]
}
mutate = {
foreach = [
{
list = "request.object.metadata.annotations.\"dependency.kyverno.io/wait-for\" | split(@, ',')"
patchStrategicMerge = {
spec = {
initContainers = [
{
name = "wait-for-{{ element | split(@, ':') | [0] | replace_all(@, '.', '-') }}"
image = "busybox:1.37"
command = ["sh", "-c", "until nc -z {{ element | split(@, ':') | [0] }} {{ element | split(@, ':') | [1] }}; do echo waiting for {{ element }}; sleep 2; done"]
}
]
}
}
}
]
}
}
]
}
})
}

View file

@ -0,0 +1,289 @@
# =============================================================================
# Keel Auto-Update Annotation Injector
# =============================================================================
# Design: infra/docs/plans/2026-05-16-auto-upgrade-apps-design.md
# Plan: infra/docs/plans/2026-05-16-auto-upgrade-apps-plan.md
#
# Mutate policy that adds keel.sh/* annotations to Deployments,
# StatefulSets and DaemonSets in *opted-in* namespaces. Opt-in is via a
# label on the namespace:
#
# labels = { "keel.sh/enrolled" = "true" }
#
# Phase rollout = label more namespaces. No edit to this file per phase.
#
# Workloads can individually opt out with the label keel.sh/policy=never
# (used by the rollback runbook). The keel namespace itself is always
# excluded (design decision #11 supervisor must not auto-update).
resource "kubectl_manifest" "policy_inject_keel_annotations" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "inject-keel-annotations"
annotations = {
"policies.kyverno.io/title" = "Inject Keel Auto-Update Annotations"
"policies.kyverno.io/category" = "Automation"
"policies.kyverno.io/severity" = "low"
"policies.kyverno.io/description" = "Adds keel.sh/policy: patch + trigger: poll + pollSchedule annotations to workloads in namespaces labeled keel.sh/enrolled=true, and ACTIVELY STRIPS the legacy keel.sh/match-tag annotation (proven unreliable on 2026-05-26 — it let Keel rewrite tag strings / cross-assign images). Phase rollout per docs/plans/2026-05-16-auto-upgrade-apps-{design,plan}.md."
}
}
spec = {
# Retroactively mutate workloads that existed BEFORE their namespace
# got the keel.sh/enrolled=true label. Without this, Kyverno only
# fires on admission events, so old workloads stay unannotated and
# Keel doesn't watch them. With this flag, Kyverno's BackgroundScan
# controller applies the mutate on existing matching resources when
# the policy is created or updated.
mutateExistingOnPolicyUpdate = true
background = true
rules = [{
name = "add-keel-annotations"
match = {
any = [{
resources = {
kinds = ["Deployment", "StatefulSet", "DaemonSet"]
namespaceSelector = {
matchLabels = {
"keel.sh/enrolled" = "true"
}
}
}
}]
}
exclude = {
any = [
{
resources = {
# Namespaces that must NEVER be auto-updated by Keel.
# Each has a domain-aware upgrade flow (operator, Helm chart
# version bump, schema migration, etc.) that Keel would fight.
#
# - keel: supervisor self-update (decision #11)
# - calico-system: tigera-operator owns Installation CR
# - authentik: 2026-05-17 incident minor bump 2026.2.22026.2.3
# broke pgbouncer connections; rolled back manually
# - vault, cnpg-system, dbaas: state-coupled with TF backend
# - monitoring: kube-prometheus-stack multi-component coordination
# - traefik, metallb-system, technitium: networking critical path
# - kyverno, external-secrets, sealed-secrets, reloader,
# descheduler, vpa, kube-system: cluster-level operators
# - proxmox-csi, nfs-csi, nvidia, tigera-operator: hardware/CNI
# coordination
# - cloudflared, headscale, wireguard, xray: VPN/tunnel critical
# - infra-maintenance: cluster utilities
#
# 2026-05-17 ENROLLMENT EXPANSION (final round): removed an
# additional 9 namespaces from the exclude list per explicit
# user decision (auto-updates now allowed in authentik,
# kyverno, metallb-system, external-secrets, proxmox-csi,
# nfs-csi, vpa, sealed-secrets, infra-maintenance), plus
# aiostreams + woodpecker which were unenrolled by namespace
# label only. The `force + match-tag` pairing limits each
# workload to digest-only watches under the deployment's
# CURRENT tag string no tag-switching, just rolls on
# upstream digest changes for that pinned tag.
#
# Risks to monitor (worth catching regressions on):
# - kyverno: cluster admission engine. `forceFailurePolicyIgnore`
# keeps the cluster admitting pods if Kyverno is down, and
# the admission controller runs 2 replicas, so a bad-digest
# roll can be recovered from by deleting the bad pod.
# - nfs-csi + proxmox-csi: CSI plugins. We pinned the helm
# chart versions today (commit 128cfbbc for nfs-csi); Keel
# tracks the image's digest under the CURRENT tag if
# upstream re-pushes a patch under the same tag, Keel rolls.
# - external-secrets + sealed-secrets: cluster bootstrappers.
# Multi-replica + tightly-versioned upstream.
# - metallb-system: networking critical path. Speaker is a
# DaemonSet, controller has 1 replica a bad roll can
# briefly flap LB IPs.
# - authentik: 2026-05-17 incident bit us when minor bump
# 2026.2.2 2026.2.3 broke pgbouncer connections. With
# match-tag=true, digest changes under the same tag string
# are rare (upstream stable patch repushes are uncommon).
# If they happen we get rolled; restore via helm rollback.
#
# Remaining exclusions (7) are irreducible: keel itself,
# calico-system + tigera-operator (operator-managed),
# cnpg-system + dbaas (state-coupled), nvidia (pinned to
# 570.195.03 until NVIDIA ships ubuntu26.04 images per
# code-8vr0), kube-system (k8s built-ins).
#
# 2026-05-29: ADDED postiz. Two Keel failure modes, both
# unfixable while postiz stays enrolled:
# 1. Bundled redis StatefulSets run docker.io/bitnamilegacy/
# redis (the Broadcom archive repo). Keel hourly resolves
# newer patch tags (7.4.07.4.1/7.4.2) and tries to roll,
# but require-trusted-registries (security-policies.tf)
# denies bitnamilegacy/* (only bitnami/* is allowlisted).
# Endless denyretrySlack-ping loop.
# 2. Keel bumped postiz-app v2.21.7v2.21.8 (2026-05-26); the
# surge pod can't schedule under the 3Gi tier-4-aux quota,
# wedging the rollout for 3 days (rolled back to v2.21.7).
# postiz Terraform state is heavily drifted (~2/30 resources
# tracked memory id=2798/2840), so per-workload opt-out can't
# be applied from the postiz stack. Namespace exclude here
# (clean kyverno state) is the reliable guard. Workloads also
# carry keel.sh/policy=never (annotation+label) set via kubectl
# since the postiz stack can't apply.
namespaces = [
"keel",
"calico-system",
"cnpg-system",
"dbaas",
"nvidia",
"kube-system",
"tigera-operator",
"postiz",
]
}
},
{
resources = {
selector = {
matchLabels = {
"keel.sh/policy" = "never"
}
}
}
},
]
}
mutate = {
# Required when mutateExistingOnPolicyUpdate=true tells the
# background controller which existing resources to mutate.
targets = [
{ apiVersion = "apps/v1", kind = "Deployment" },
{ apiVersion = "apps/v1", kind = "StatefulSet" },
{ apiVersion = "apps/v1", kind = "DaemonSet" },
]
patchStrategicMerge = {
metadata = {
annotations = {
# DEFAULT IS `force` + `match-tag: true` the safe-force
# pairing learned from the 2026-05-16 :17 incident.
#
# How safe-force works:
# - `force` alone polls the registry and grabs the NEWEST
# tag (any tag), which is what downgraded claude-memory
# from :71b32438 :17 (numeric "17" sorted higher than
# hex SHA). UNSAFE on its own.
# - `match-tag: "true"` constrains `force` to watch ONLY
# the deployment's CURRENT tag string for DIGEST changes.
# Keel never rewrites the tag it just rolls the pod
# when the digest behind that tag changes. This is the
# correct primitive for `:latest` (and `:major`-style
# floating tags).
#
# Effect per tag type:
# - `:latest` / `:nightly` / `:v1` (mutable): Keel rolls
# whenever upstream pushes a new digest under that tag.
# This is the auto-update behaviour the design wants.
# - `:1.2.3` / `:71b32438` (immutable/content-addressed):
# digest never changes Keel does nothing pinned.
# Safe-by-default for SHA-pinned workloads.
#
# `+(...)` is anchor-preserve (add only if missing). We DROP
# `+()` on `policy` and `match-tag` so an apply migrates
# existing workloads from the old `patch` default to the new
# `force + match-tag` pair. Annotation-only changes do NOT
# restart pods; future digest changes do.
#
# Per-workload overrides (set via kubectl/Terraform):
# "keel.sh/policy" = "never" opt out (set the LABEL too
# to bypass this mutation)
# Per-namespace opt-out:
# Remove the `keel.sh/enrolled=true` namespace label.
# 2026-05-26: switched default from `force + match-tag=true`
# to `patch` after the 2026-05-26 incident proved match-tag
# does NOT reliably constrain Keel tag strings got rewritten
# (uptime-kuma :2:1, n8n :1.80.5:0.1.2, dolt-workbench
# :0.3.73:0.1.0, wealthfolio :3.2.1:2.0:3.2 truncated).
#
# `patch` is semver-parser-bounded:
# - Only patch bumps within current major.minor
# (e.g. 1.2.3 1.2.4; never 1.3.x or 2.x).
# - Non-semver tags (`:latest`, `:v4`, `:2`, SHA, `:nightly`)
# are IGNORED entirely Keel does nothing for them.
# - No more string-comparison surprises.
#
# `match-tag` annotation dropped it was only meaningful as
# the (failed) safety net under `force`. Irrelevant under
# semver-bounded policies.
#
# `+(...)` anchor = "add only if missing". With the anchor,
# this policy ONLY sets defaults on new workloads existing
# per-workload overrides (set via TF or kubectl annotate)
# are preserved across policy updates. This was DROPPED for
# one apply on 2026-05-26 to migrate the 151 stale `force`
# annotations to `patch`, then re-added in the same session
# after observing that the label-based exclude rule below
# doesn't reliably filter mutateExistingOnPolicyUpdate scans
# (22 workloads with LABEL keel.sh/policy=never still got
# their ANNOTATION rewritten and had to be repatched). Keep
# the anchor unless you genuinely want a cluster-wide flip.
#
# To override per workload, set the ANNOTATION directly:
# - keel.sh/policy=never (Keel won't touch)
# - keel.sh/policy=minor (wider semver bumps, still bounded)
# - keel.sh/policy=major (any semver bump)
# The corresponding LABEL keel.sh/policy=never is for the
# exclude rule below (defense-in-depth against future mutations).
"+(keel.sh/policy)" = "patch"
"+(keel.sh/trigger)" = "poll"
"+(keel.sh/pollSchedule)" = "@every 1h"
# ACTIVELY STRIP the legacy match-tag annotation. The
# 2026-05-26 migration flipped the default policy forcepatch
# and DROPPED match-tag from this patch but Kyverno's
# add-only mutate can't remove an annotation that's no longer
# listed, so ~194 pre-migration workloads kept a stale
# keel.sh/match-tag=true. That flag let Keel cross-assign
# images across containers in multi-image pods (the `blog`
# deployment had its nginx nginx-exporter images swapped,
# site down 2026-05-26 2026-06-01; the nginx blog image
# received the exporter's `-nginx.scrape-uri` arg and
# CrashLoopBackOff'd). Setting the key to null strips it at
# ADMISSION on every enrolled workload create/update
# (strategic-merge removal; no-op if absent; annotation-only
# NO pod restart). Deliberately NO `+(...)` anchor removal
# must be unconditional. NOTE: mutateExistingOnPolicyUpdate did
# NOT regenerate UpdateRequests for this removal-only change
# (Kyverno re-mutates existing resources for add/set, not
# deletions), so the 194 pre-existing workloads were swept once
# via `kubectl annotate <kind>/<name> -n <ns> keel.sh/match-tag-`
# on 2026-06-01. The policy keeps it gone (never re-added) and
# strips it from any future workload.
# See post-mortems/2026-06-01-keel-match-tag-image-swap.md.
"keel.sh/match-tag" = null
}
}
}
}
}]
}
})
depends_on = [helm_release.kyverno]
}
# Grant the Kyverno background-controller SA permission to mutate
# Deployments / StatefulSets / DaemonSets required for the policy
# above (mutateExistingOnPolicyUpdate=true + mutate.targets). Kyverno's
# `kyverno:background-controller` ClusterRole aggregates roles labeled
# `rbac.kyverno.io/aggregate-to-background-controller: "true"`.
resource "kubernetes_cluster_role" "keel_mutate_existing" {
metadata {
name = "kyverno:background-controller:keel-mutate-existing"
labels = {
"rbac.kyverno.io/aggregate-to-background-controller" = "true"
}
}
rule {
api_groups = ["apps"]
resources = ["deployments", "statefulsets", "daemonsets"]
verbs = ["get", "list", "watch", "update", "patch"]
}
depends_on = [helm_release.kyverno]
}

View file

@ -0,0 +1,226 @@
resource "kubernetes_namespace" "kyverno" {
metadata {
name = "kyverno"
labels = {
"istio-injection" : "disabled"
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
resource "helm_release" "kyverno" {
namespace = kubernetes_namespace.kyverno.metadata[0].name
create_namespace = false
name = "kyverno"
atomic = true
repository = "https://kyverno.github.io/kyverno/"
chart = "kyverno"
version = "3.6.1"
values = [yamlencode({
# When Kyverno is unavailable, allow pod creation to proceed without
# mutation/validation rather than blocking all admissions cluster-wide.
features = {
forceFailurePolicyIgnore = {
enabled = true
}
policyReports = {
enabled = false
}
}
reportsController = {
resources = {
limits = {
memory = "512Mi"
}
requests = {
cpu = "100m"
memory = "384Mi"
}
}
}
backgroundController = {
resources = {
# Bumped 2026-05-16 from 384Mi 2Gi because the controller OOMKilled
# while processing 176 UpdateRequests for the inject-keel-annotations
# mutate-existing scan. With mutateExistingOnPolicyUpdate=true the
# background controller needs significantly more memory during the
# initial bulk scan.
limits = {
memory = "2Gi"
}
requests = {
cpu = "100m"
memory = "256Mi"
}
}
}
cleanupController = {
resources = {
limits = {
memory = "192Mi"
}
requests = {
cpu = "100m"
memory = "192Mi"
}
}
}
admissionController = {
replicas = 2
updateStrategy = {
type = "RollingUpdate"
rollingUpdate = {
maxSurge = 0
maxUnavailable = 1
}
}
container = {
resources = {
limits = {
memory = "256Mi"
}
requests = {
cpu = "100m"
memory = "256Mi"
}
}
}
# More tolerant liveness probe API server slowness shouldn't kill the pod
livenessProbe = {
httpGet = {
path = "/health/liveness"
port = 9443
scheme = "HTTPS"
}
initialDelaySeconds = 15
periodSeconds = 30
timeoutSeconds = 5
failureThreshold = 4
successThreshold = 1
}
# Spread replicas across nodes for HA
topologySpreadConstraints = [
{
maxSkew = 1
topologyKey = "kubernetes.io/hostname"
whenUnsatisfiable = "DoNotSchedule"
labelSelector = {
matchLabels = {
"app.kubernetes.io/component" = "admission-controller"
"app.kubernetes.io/instance" = "kyverno"
}
}
}
]
}
})]
}
# To unlabel all:
# kubectl label deployment,statefulset,daemonset --all-namespaces -l tier tier-
#
# Uses namespaceSelector to match tiers no API call needed.
# One rule per tier so Kyverno resolves the tier value from its informer cache.
resource "kubectl_manifest" "mutate_tier_from_namespace" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "sync-tier-label-from-namespace"
}
spec = {
rules = [for tier in local.governance_tiers : {
name = "sync-tier-${tier}"
match = {
any = [
{
resources = {
kinds = ["Deployment", "StatefulSet", "DaemonSet"]
namespaceSelector = {
matchLabels = {
tier = tier
}
}
}
}
]
}
exclude = {
any = [
{
resources = {
namespaces = ["kube-system", "metallb-system", "n8n"]
}
}
]
}
mutate = {
patchStrategicMerge = {
metadata = {
labels = {
"+(tier)" = tier
}
}
}
}
}]
}
})
}
# resource "kubectl_manifest" "enforce_pod_tier_label" {
# manifest = {
# apiVersion = "kyverno.io/v1"
# kind = "ClusterPolicy"
# metadata = {
# name = "enforce-pod-tier-label"
# annotations = {
# "policies.kyverno.io/description" = "Rejects any pod that does not have a tier label."
# }
# }
# spec = {
# # 'Enforce' blocks the creation. 'Audit' just reports it.
# validationFailureAction = "Enforce"
# background = true
# rules = [
# {
# name = "check-for-tier-label"
# match = {
# any = [
# {
# resources = {
# kinds = ["Pod"]
# }
# }
# ]
# }
# validate = {
# message = "The label 'tier' is required for all pods in this cluster."
# pattern = {
# metadata = {
# labels = {
# "tier" = "?*" # The "?*" syntax means the value must not be empty
# }
# }
# }
# }
# }
# ]
# }
# }
# }

View file

@ -0,0 +1,135 @@
# =============================================================================
# Private Docker Registry Credentials Auto-sync to all namespaces
# =============================================================================
# Source secret in kyverno namespace, cloned by ClusterPolicy into every NS.
# Pods use imagePullSecrets: [{name: registry-credentials}] to pull from
# registry.viktorbarzin.me (or 10.0.20.10:5050 internally).
data "vault_kv_secret_v2" "viktor" {
mount = "secret"
name = "viktor"
}
resource "kubernetes_secret" "registry_credentials" {
metadata {
name = "registry-credentials"
namespace = kubernetes_namespace.kyverno.metadata[0].name
}
type = "kubernetes.io/dockerconfigjson"
data = {
".dockerconfigjson" = jsonencode({
auths = {
# Phase 4 of forgejo-registry-consolidation 2026-05-07 registry-
# private decommissioned. Old auths entries (registry.viktorbarzin.me,
# registry.viktorbarzin.me:5050, 10.0.20.10:5050) removed to prevent
# silent fallback. If a pod somehow references the old hostname now,
# it will visibly fail with auth missing rather than silently pulling
# potentially-stale blobs.
"forgejo.viktorbarzin.me" = {
auth = base64encode("cluster-puller:${try(data.vault_kv_secret_v2.viktor.data["forgejo_pull_token"], "")}")
}
}
})
}
}
# Grant Kyverno controllers permission to manage Secrets (needed for generate clone rules)
resource "kubernetes_cluster_role" "kyverno_secret_manager" {
metadata {
name = "kyverno:secret-manager"
labels = {
"app.kubernetes.io/instance" = "kyverno"
}
}
rule {
api_groups = [""]
resources = ["secrets"]
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
}
}
resource "kubernetes_cluster_role_binding" "kyverno_admission_secret_manager" {
metadata {
name = "kyverno:admission-controller:secret-manager"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.kyverno_secret_manager.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = "kyverno-admission-controller"
namespace = "kyverno"
}
}
resource "kubernetes_cluster_role_binding" "kyverno_background_secret_manager" {
metadata {
name = "kyverno:background-controller:secret-manager"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.kyverno_secret_manager.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = "kyverno-background-controller"
namespace = "kyverno"
}
}
resource "kubectl_manifest" "sync_registry_credentials" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "sync-registry-credentials"
}
spec = {
rules = [
{
name = "sync-registry-secret"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
}
}
]
}
exclude = {
any = [
{
resources = {
namespaces = ["kube-system", "kube-public", "kube-node-lease"]
}
}
]
}
generate = {
apiVersion = "v1"
kind = "Secret"
name = "registry-credentials"
namespace = "{{request.object.metadata.name}}"
synchronize = true
clone = {
namespace = "kyverno"
name = "registry-credentials"
}
}
}
]
}
})
depends_on = [
helm_release.kyverno,
kubernetes_secret.registry_credentials,
kubernetes_cluster_role_binding.kyverno_admission_secret_manager,
kubernetes_cluster_role_binding.kyverno_background_secret_manager,
]
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,373 @@
# =============================================================================
# Pod Security Policies
# =============================================================================
# Kyverno validate policies for pod security standards.
# Wave 1 (locked 2026-05-18, beads code-8ywc): deny-privileged-containers,
# deny-host-namespaces, restrict-sys-admin flipped from Audit Enforce with
# a shared 32-namespace exclude list. require-trusted-registries followed on
# 2026-05-19 also Enforce now, with an explicit registry allowlist (the
# `*/*` catch-all was removed so unknown registries fail closed at admission).
# To allow a new image source, add it to policy_require_trusted_registries below.
# failurePolicy stays Ignore (chart-level) to prevent admission webhook
# failures from cascading.
# Shared namespace exclude list 31 critical namespaces from the Keel rollout
# (memory id=1970) + `frigate` (legitimately needs host access for camera RTSP).
locals {
security_policy_exclude_namespaces = [
"keel", "calico-system", "authentik", "vault", "cnpg-system", "dbaas",
"monitoring", "traefik", "technitium", "mailserver", "kyverno",
"metallb-system", "external-secrets", "proxmox-csi", "nfs-csi", "nvidia",
"kube-system", "cloudflared", "crowdsec", "reverse-proxy", "reloader",
"descheduler", "vpa", "redis", "sealed-secrets", "headscale", "wireguard",
"xray", "infra-maintenance", "metrics-server", "tigera-operator", "frigate",
# Additions discovered during wave 1 enforce flip these contain workloads
# that legitimately need privileged / hostNetwork / SYS_ADMIN:
"kured", # kured DaemonSet is privileged (manages node reboots)
"default", # etcd backup + defrag CronJobs use hostNetwork
"changedetection", # uses SYS_ADMIN for chromium sandbox
"woodpecker", # CI pipeline pods (wp-*) run privileged docker builds
]
}
resource "kubectl_manifest" "policy_deny_privileged" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "deny-privileged-containers"
annotations = {
"policies.kyverno.io/title" = "Deny Privileged Containers"
"policies.kyverno.io/category" = "Pod Security"
"policies.kyverno.io/severity" = "high"
"policies.kyverno.io/description" = "Privileged containers have full host access. Deny unless explicitly exempted."
}
}
spec = {
validationFailureAction = "Enforce"
background = true
rules = [{
name = "deny-privileged"
match = {
any = [{
resources = {
kinds = ["Pod"]
}
}]
}
exclude = {
any = [{
resources = {
namespaces = local.security_policy_exclude_namespaces
}
}]
}
validate = {
message = "Privileged containers are not allowed. Use specific capabilities instead."
pattern = {
spec = {
containers = [{
"=(securityContext)" = {
"=(privileged)" = false
}
}]
"=(initContainers)" = [{
"=(securityContext)" = {
"=(privileged)" = false
}
}]
}
}
}
}]
}
})
depends_on = [helm_release.kyverno]
}
resource "kubectl_manifest" "policy_deny_host_namespaces" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "deny-host-namespaces"
annotations = {
"policies.kyverno.io/title" = "Deny Host Namespaces"
"policies.kyverno.io/category" = "Pod Security"
"policies.kyverno.io/severity" = "high"
"policies.kyverno.io/description" = "Sharing host namespaces enables container escapes. Deny hostNetwork, hostPID, hostIPC."
}
}
spec = {
validationFailureAction = "Enforce"
background = true
rules = [{
name = "deny-host-namespaces"
match = {
any = [{
resources = {
kinds = ["Pod"]
}
}]
}
exclude = {
any = [{
resources = {
namespaces = local.security_policy_exclude_namespaces
}
}]
}
validate = {
message = "Host namespaces (hostNetwork, hostPID, hostIPC) are not allowed."
pattern = {
spec = {
"=(hostNetwork)" = false
"=(hostPID)" = false
"=(hostIPC)" = false
}
}
}
}]
}
})
depends_on = [helm_release.kyverno]
}
resource "kubectl_manifest" "policy_restrict_capabilities" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "restrict-sys-admin"
annotations = {
"policies.kyverno.io/title" = "Restrict SYS_ADMIN Capability"
"policies.kyverno.io/category" = "Pod Security"
"policies.kyverno.io/severity" = "high"
"policies.kyverno.io/description" = "SYS_ADMIN is nearly equivalent to root. Restrict to explicitly exempted namespaces."
}
}
spec = {
validationFailureAction = "Enforce"
background = true
rules = [{
name = "restrict-sys-admin"
match = {
any = [{
resources = {
kinds = ["Pod"]
}
}]
}
exclude = {
any = [{
resources = {
namespaces = local.security_policy_exclude_namespaces
}
}]
}
validate = {
message = "Adding SYS_ADMIN capability is not allowed."
deny = {
conditions = {
any = [{
key = "{{ request.object.spec.containers[].securityContext.capabilities.add[] || `[]` }}"
operator = "AnyIn"
value = ["SYS_ADMIN"]
}]
}
}
}
}]
}
})
depends_on = [helm_release.kyverno]
}
# =============================================================================
# Image Pull Policy Governance
# =============================================================================
# Mutate imagePullPolicy to IfNotPresent for all containers with pinned tags
# (non-:latest). This prevents pods from getting stuck in ImagePullBackOff
# when the pull-through cache at 10.0.20.10 has transient failures.
# For :latest or untagged images, set to Always so stale images don't persist.
resource "kubectl_manifest" "policy_set_image_pull_policy" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "set-image-pull-policy"
annotations = {
"policies.kyverno.io/title" = "Set Image Pull Policy"
"policies.kyverno.io/category" = "Best Practices"
"policies.kyverno.io/severity" = "medium"
"policies.kyverno.io/description" = "Set imagePullPolicy to IfNotPresent for pinned tags and Always for :latest to prevent ImagePullBackOff from transient cache failures."
}
}
spec = {
background = false
rules = [
{
name = "set-ifnotpresent-for-pinned-tags"
match = {
any = [{
resources = {
kinds = ["Pod"]
}
}]
}
mutate = {
foreach = [{
list = "request.object.spec.containers"
preconditions = {
all = [{
key = "{{ ends_with(element.image, ':latest') || !contains(element.image, ':') }}"
operator = "Equals"
value = false
}]
}
patchStrategicMerge = {
spec = {
containers = [{
name = "{{ element.name }}"
imagePullPolicy = "IfNotPresent"
}]
}
}
}]
}
},
{
name = "set-always-for-latest"
match = {
any = [{
resources = {
kinds = ["Pod"]
}
}]
}
mutate = {
foreach = [{
list = "request.object.spec.containers"
preconditions = {
all = [{
key = "{{ ends_with(element.image, ':latest') || !contains(element.image, ':') }}"
operator = "Equals"
value = true
}]
}
patchStrategicMerge = {
spec = {
containers = [{
name = "{{ element.name }}"
imagePullPolicy = "Always"
}]
}
}
}]
}
}
]
}
})
depends_on = [helm_release.kyverno]
}
resource "kubectl_manifest" "policy_require_trusted_registries" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "require-trusted-registries"
annotations = {
"policies.kyverno.io/title" = "Require Trusted Image Registries"
"policies.kyverno.io/category" = "Pod Security"
"policies.kyverno.io/severity" = "medium"
"policies.kyverno.io/description" = "Images must come from trusted registries to prevent supply chain attacks."
}
}
spec = {
# Wave 1 W1.5: flipped Audit Enforce 2026-05-19 with explicit allowlist.
# Allowlist enumerated from `kubectl get pods -A -o jsonpath='{..image}'`
# on 2026-05-18; covers all in-cluster image sources. Update on adding new
# workloads from a registry NOT in this list (and ask if the new registry
# is trusted before opening it). The `*/*` catch-all was deliberately
# removed so unknown registries fail closed at admission.
validationFailureAction = "Enforce"
background = true
rules = [{
name = "validate-registries"
match = {
any = [{
resources = {
kinds = ["Pod"]
}
}]
}
exclude = {
any = [{
resources = {
namespaces = local.security_policy_exclude_namespaces
}
}]
}
validate = {
message = "Images must be from trusted registries. Allowlist defined in stacks/kyverno/modules/kyverno/security-policies.tf — add the new registry there if intentional, otherwise switch the workload to a trusted source."
pattern = {
spec = {
containers = [{
image = join(" | ", [
# Explicit registries
"docker.io/*", "ghcr.io/*", "quay.io/*", "registry.k8s.io/*",
"gcr.io/*", "us-docker.pkg.dev/*", "lscr.io/*",
"codeberg.org/*", "mcr.microsoft.com/*", "nvcr.io/*",
"oci.external-secrets.io/*", "reg.kyverno.io/*",
"docker.n8n.io/*", "registry.gitlab.com/*",
# Private
"forgejo.viktorbarzin.me/*", "10.0.20.10*",
# Legacy private registry (decommissioned 2026-05-07 per CLAUDE.md
# but council-complaints still references migrate to Forgejo).
"registry.viktorbarzin.me/*",
# DockerHub library (bare image names without slash)
"alpine*", "busybox*", "kong*", "mysql*", "nginx*", "postgres*", "python*",
# DockerHub user repos (no registry prefix, has slash)
# enumerated from current cluster state. New entries added
# 2026-05-22 after Enforce caught these as unallowlisted:
# amruthpillai (resume), athomasson2 (ebook2audiobook),
# netboxcommunity (netbox), nousresearch (hermes-agent),
# opentripplanner (osm-routing), rhasspy (whisper/piper).
# 2026-06-05: mauriceboe (TREK group-trip planner trial).
"actualbudget/*", "afadil/*", "amruthpillai/*", "athomasson2/*",
"binwiederhier/*", "bitnami/*",
"clickhouse/*", "cloudflare/*", "coturn/*", "crowdsecurity/*",
"curlimages/*", "deluan/*", "dgtlmoon/*", "dolthub/*",
"dpage/*", "dperson/*", "edoburu/*", "esanchezm/*",
"freikin/*", "freshrss/*", "hackmdio/*", "hashicorp/*",
"headscale/*", "jhonderson/*", "kebe/*", "library/*",
"lissy93/*", "louislam/*", "matrixdotorg/*", "mauriceboe/*",
"mendhak/*",
"mghee/*", "mindflavor/*", "mpepping/*", "netboxcommunity/*",
"netsampler/*", "nousresearch/*", "nvidia/*", "onlyoffice/*",
"openresty/*", "opentripplanner/*", "owntracks/*",
"phpipam/*", "phpmyadmin/*", "privatebin/*", "prom/*",
"prompve/*", "rancher/*", "rhasspy/*", "roundcube/*", "sclevine/*",
"shadowsocks/*", "shlinkio/*", "stirlingtools/*",
"technitium/*", "teddysun/*", "temporalio/*",
"typhonragewind/*", "tzahi12345/*", "vabene1111/*",
"vaultwarden/*", "viktorbarzin/*", "viren070/*",
"woodpeckerci/*", "zelest/*",
])
}]
}
}
}
}]
}
})
depends_on = [helm_release.kyverno]
}

View file

@ -0,0 +1,72 @@
# =============================================================================
# TLS Certificate Auto-sync to all namespaces
# =============================================================================
# Source wildcard cert (*.viktorbarzin.me) in kyverno namespace, cloned by
# ClusterPolicy into every NS. Renewal pipeline updates this source secret,
# Kyverno propagates to all namespaces within seconds.
resource "kubernetes_secret" "tls_secret" {
metadata {
name = "tls-secret"
namespace = kubernetes_namespace.kyverno.metadata[0].name
}
type = "kubernetes.io/tls"
data = {
"tls.crt" = file("${path.root}/secrets/fullchain.pem")
"tls.key" = file("${path.root}/secrets/privkey.pem")
}
}
resource "kubectl_manifest" "sync_tls_secret" {
yaml_body = yamlencode({
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "sync-tls-secret"
}
spec = {
rules = [
{
name = "sync-tls-secret"
match = {
any = [
{
resources = {
kinds = ["Namespace"]
}
}
]
}
exclude = {
any = [
{
resources = {
namespaces = ["kube-system", "kube-public", "kube-node-lease"]
}
}
]
}
generate = {
apiVersion = "v1"
kind = "Secret"
name = "tls-secret"
namespace = "{{request.object.metadata.name}}"
synchronize = true
clone = {
namespace = "kyverno"
name = "tls-secret"
}
}
}
]
}
})
depends_on = [
helm_release.kyverno,
kubernetes_secret.tls_secret,
kubernetes_cluster_role_binding.kyverno_admission_secret_manager,
kubernetes_cluster_role_binding.kyverno_background_secret_manager,
]
}

View file

@ -0,0 +1,11 @@
# kubectl provider used by kubectl_manifest resources (swapped from
# hashicorp/kubernetes kubernetes_manifest due to provider crash on Kyverno
# ClusterPolicy CRDs, beads code-e2dp).
terraform {
required_providers {
kubectl = {
source = "gavinbunney/kubectl"
version = "~> 1.14"
}
}
}