infra/stacks/traefik/modules/traefik/error-pages.tf

194 lines
4.7 KiB
Terraform
Raw Normal View History

# Custom error pages using tarampampam/error-pages
# Serves themed error pages for 5xx errors and catch-all 404 for unknown hosts
resource "kubernetes_deployment" "error_pages" {
metadata {
name = "error-pages"
namespace = kubernetes_namespace.traefik.metadata[0].name
labels = {
app = "error-pages"
}
}
spec {
replicas = 2
strategy {
type = "RollingUpdate"
rolling_update {
max_unavailable = 0
max_surge = 1
}
}
selector {
match_labels = {
app = "error-pages"
}
}
template {
metadata {
labels = {
app = "error-pages"
}
annotations = {
"diun.enable" = "true"
}
}
spec {
topology_spread_constraint {
max_skew = 1
topology_key = "kubernetes.io/hostname"
when_unsatisfiable = "DoNotSchedule"
label_selector {
match_labels = {
app = "error-pages"
}
}
}
container {
name = "error-pages"
image = "ghcr.io/tarampampam/error-pages:3"
port {
container_port = 8080
}
env {
name = "TEMPLATE_NAME"
value = "shuffle"
}
liveness_probe {
http_get {
path = "/healthz"
port = 8080
}
initial_delay_seconds = 3
period_seconds = 10
}
readiness_probe {
http_get {
path = "/healthz"
port = 8080
}
initial_delay_seconds = 2
period_seconds = 5
}
resources {
requests = {
cpu = "5m"
memory = "32Mi"
}
limits = {
memory = "32Mi"
}
}
}
}
}
}
lifecycle {
[infra] Establish KYVERNO_LIFECYCLE_V1 drift-suppression convention [ci skip] ## Context Phase 1 of the state-drift consolidation audit (plan Wave 3) identified that the entire repo leans on a repeated `lifecycle { ignore_changes = [...dns_config] }` snippet to suppress Kyverno's admission-webhook dns_config mutation (the ndots=2 override that prevents NxDomain search-domain flooding). 27 occurrences across 19 stacks. Without this suppression, every pod-owning resource shows perpetual TF plan drift. The original plan proposed a shared `modules/kubernetes/kyverno_lifecycle/` module emitting the ignore-paths list as an output that stacks would consume in their `ignore_changes` blocks. That approach is architecturally impossible: Terraform's `ignore_changes` meta-argument accepts only static attribute paths — it rejects module outputs, locals, variables, and any expression (the HCL spec evaluates `lifecycle` before the regular expression graph). So a DRY module cannot exist. The canonical pattern IS the repeated snippet. What the snippet was missing was a *discoverability tag* so that (a) new resources can be validated for compliance, (b) the existing 27 sites can be grep'd in a single command, and (c) future maintainers understand the convention rather than each reinventing it. ## This change - Introduces `# KYVERNO_LIFECYCLE_V1` as the canonical marker comment. Attached inline on every `spec[0].template[0].spec[0].dns_config` line (or `spec[0].job_template[0].spec[0]...` for CronJobs) across all 27 existing suppression sites. - Documents the convention with rationale and copy-paste snippets in `AGENTS.md` → new "Kyverno Drift Suppression" section. - Expands the existing `.claude/CLAUDE.md` Kyverno ndots note to reference the marker and explain why the module approach is blocked. - Updates `_template/main.tf.example` so every new stack starts compliant. ## What is NOT in this change - The `kubernetes_manifest` Kyverno annotation drift (beads `code-seq`) — that is Phase B with a sibling `# KYVERNO_MANIFEST_V1` marker. - Behavioral changes — every `ignore_changes` list is byte-identical save for the inline comment. - The fallback module the original plan anticipated — skipped because Terraform rejects expressions in `ignore_changes`. - `terraform fmt` cleanup on adjacent unrelated blocks in three files (claude-agent-service, freedify/factory, hermes-agent). Reverted to keep this commit scoped to the convention rollout. ## Before / after Before (cannot distinguish accidental-forgotten from intentional-convention): ```hcl lifecycle { ignore_changes = [spec[0].template[0].spec[0].dns_config] } ``` After (greppable, self-documenting, discoverable by tooling): ```hcl lifecycle { ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1 } ``` ## Test Plan ### Automated ``` $ rg -c 'KYVERNO_LIFECYCLE_V1' stacks/ --include='*.tf' --include='*.tf.example' \ | awk -F: '{s+=$2} END {print s}' 27 $ git diff --stat | grep -E '\.(tf|tf\.example|md)$' | wc -l 21 # All code-file diffs are 1 insertion + 1 deletion per marker site, # except beads-server (3), ebooks (4), immich (3), uptime-kuma (2). $ git diff --stat stacks/ | tail -1 20 files changed, 45 insertions(+), 28 deletions(-) ``` ### Manual Verification No apply required — HCL comments only. Zero effect on any stack's plan output. Future audits: `rg 'KYVERNO_LIFECYCLE_V1' stacks/ | wc -l` must grow as new pod-owning resources are added. ## Reproduce locally 1. `cd infra && git pull` 2. `rg 'KYVERNO_LIFECYCLE_V1' stacks/` → expect 27 hits in 19 files 3. Grep any new `kubernetes_deployment` for the marker; absence = missing suppression. Closes: code-28m Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 14:15:51 +00:00
ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
}
}
resource "kubernetes_service" "error_pages" {
metadata {
name = "error-pages"
namespace = kubernetes_namespace.traefik.metadata[0].name
labels = {
app = "error-pages"
}
}
spec {
selector = {
app = "error-pages"
}
port {
name = "http"
port = 8080
target_port = 8080
}
}
}
# Errors middleware — intercepts 5xx from backends and serves themed error pages
resource "kubernetes_manifest" "middleware_error_pages" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "error-pages"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
errors = {
status = ["500-504"]
service = {
name = "error-pages"
namespace = kubernetes_namespace.traefik.metadata[0].name
port = 8080
}
query = "/{status}"
}
}
}
depends_on = [helm_release.traefik, kubernetes_service.error_pages]
}
# Default TLSStore — serves wildcard cert for unknown hosts instead of self-signed fallback
resource "kubernetes_manifest" "tlsstore_default" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "TLSStore"
metadata = {
name = "default"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
defaultCertificate = {
secretName = var.tls_secret_name
}
}
}
depends_on = [helm_release.traefik, module.tls_secret]
}
# Catch-all IngressRoute — serves 404 for unmatched *.viktorbarzin.me hosts (lowest priority)
# Only matches *.viktorbarzin.me — non-viktorbarzin.me domains get TLS rejection (no matching router)
# This prevents leaking the wildcard cert to attackers who point arbitrary domains at our IP
resource "kubernetes_manifest" "ingressroute_catchall" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "IngressRoute"
metadata = {
name = "catchall-error-pages"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
entryPoints = ["websecure"]
routes = [{
match = "HostRegexp(`^(.+\\.)?viktorbarzin\\.me$`)"
kind = "Rule"
priority = 1
middlewares = [
{ name = "rate-limit", namespace = kubernetes_namespace.traefik.metadata[0].name },
{ name = "crowdsec", namespace = kubernetes_namespace.traefik.metadata[0].name },
]
services = [{
name = "error-pages"
namespace = kubernetes_namespace.traefik.metadata[0].name
port = 8080
}]
}]
tls = {}
}
}
depends_on = [helm_release.traefik, kubernetes_service.error_pages]
}