From 878b556179275c550d2d05bde90eaf26b51336e3 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 29 Mar 2026 01:04:11 +0200 Subject: [PATCH] state(monitoring): update encrypted state --- modules/kubernetes/ingress_factory/main.tf | 13 +++++++------ .../monitoring/prometheus_chart_values.tpl | 9 ++++++++- stacks/traefik/modules/traefik/error-pages.tf | 19 +++++++++++++++++++ stacks/traefik/modules/traefik/main.tf | 14 ++++++++++++++ 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/modules/kubernetes/ingress_factory/main.tf b/modules/kubernetes/ingress_factory/main.tf index f688a549..5184a998 100644 --- a/modules/kubernetes/ingress_factory/main.tf +++ b/modules/kubernetes/ingress_factory/main.tf @@ -73,7 +73,7 @@ variable "skip_default_rate_limit" { } variable "anti_ai_scraping" { type = bool - default = true + default = null # null = auto (enabled when not protected, disabled when protected) } variable "homepage_group" { @@ -87,7 +87,8 @@ variable "homepage_enabled" { } locals { - effective_host = var.full_host != null ? var.full_host : "${var.host != null ? var.host : var.name}.${var.root_domain}" + effective_host = var.full_host != null ? var.full_host : "${var.host != null ? var.host : var.name}.${var.root_domain}" + effective_anti_ai = var.anti_ai_scraping != null ? var.anti_ai_scraping : !var.protected ns_to_group = { monitoring = "Infrastructure" @@ -165,10 +166,10 @@ resource "kubernetes_ingress_v1" "proxied-ingress" { var.skip_default_rate_limit ? null : "traefik-rate-limit@kubernetescrd", var.custom_content_security_policy == null ? "traefik-csp-headers@kubernetescrd" : null, var.exclude_crowdsec ? null : "traefik-crowdsec@kubernetescrd", - var.anti_ai_scraping ? "traefik-ai-bot-block@kubernetescrd" : null, - var.anti_ai_scraping ? "traefik-anti-ai-headers@kubernetescrd" : null, - var.anti_ai_scraping ? "traefik-strip-accept-encoding@kubernetescrd" : null, - var.anti_ai_scraping ? "traefik-anti-ai-trap-links@kubernetescrd" : null, + local.effective_anti_ai ? "traefik-ai-bot-block@kubernetescrd" : null, + local.effective_anti_ai ? "traefik-anti-ai-headers@kubernetescrd" : null, + local.effective_anti_ai ? "traefik-strip-accept-encoding@kubernetescrd" : null, + local.effective_anti_ai ? "traefik-anti-ai-trap-links@kubernetescrd" : null, var.protected ? "traefik-authentik-forward-auth@kubernetescrd" : null, var.allow_local_access_only ? "traefik-local-only@kubernetescrd" : null, var.rybbit_site_id != null ? "traefik-strip-accept-encoding@kubernetescrd" : null, diff --git a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl index 81b35280..c0df184b 100755 --- a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl @@ -1357,9 +1357,16 @@ serverFiles: expr: (traefik_tls_certs_not_after - time()) / 86400 < 7 for: 1h labels: - severity: warning + severity: critical annotations: summary: "TLS cert {{ $labels.cn }} expires in {{ $value | printf \"%.0f\" }} days" + - alert: TLSCertRenewalOverdue + expr: (traefik_tls_certs_not_after - time()) / 86400 < 30 + for: 1h + labels: + severity: warning + annotations: + summary: "TLS cert {{ $labels.cn }} expires in {{ $value | printf \"%.0f\" }} days — renewal may have failed (LE certs valid 90d, renewed at 60d)" - alert: TraefikHighOpenConnections expr: sum(traefik_service_open_connections) by (service) > 500 for: 5m diff --git a/stacks/traefik/modules/traefik/error-pages.tf b/stacks/traefik/modules/traefik/error-pages.tf index 13eab3c2..7d76fcee 100644 --- a/stacks/traefik/modules/traefik/error-pages.tf +++ b/stacks/traefik/modules/traefik/error-pages.tf @@ -139,6 +139,25 @@ resource "kubernetes_manifest" "middleware_error_pages" { depends_on = [helm_release.traefik, kubernetes_service.error_pages] } +# Default TLSStore — serves wildcard cert for unknown hosts instead of self-signed fallback +resource "kubernetes_manifest" "tlsstore_default" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "TLSStore" + metadata = { + name = "default" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + defaultCertificate = { + secretName = var.tls_secret_name + } + } + } + + depends_on = [helm_release.traefik, module.tls_secret] +} + # Catch-all IngressRoute — serves 404 for unknown hosts (lowest priority) resource "kubernetes_manifest" "ingressroute_catchall" { manifest = { diff --git a/stacks/traefik/modules/traefik/main.tf b/stacks/traefik/modules/traefik/main.tf index 887c95e0..bfaf5683 100644 --- a/stacks/traefik/modules/traefik/main.tf +++ b/stacks/traefik/modules/traefik/main.tf @@ -34,6 +34,14 @@ resource "helm_release" "traefik" { values = [yamlencode({ deployment = { replicas = 3 + terminationGracePeriodSeconds = 60 + lifecycle = { + preStop = { + exec = { + command = ["/bin/sh", "-c", "sleep 15"] + } + } + } podAnnotations = { "diun.enable" = "true" "diun.include_tags" = "^v\\d+(?:\\.\\d+)?(?:\\.\\d+)?.*$" @@ -193,6 +201,12 @@ resource "helm_release" "traefik" { "--serversTransport.forwardingTimeouts.dialTimeout=60s", "--serversTransport.forwardingTimeouts.responseHeaderTimeout=30s", "--serversTransport.forwardingTimeouts.idleConnTimeout=90s", + # Increase backend connection pool (default maxIdleConnsPerHost=2 is too low) + "--serversTransport.maxIdleConnsPerHost=100", + # Explicit entrypoint timeouts to bound tail latency from slow clients + "--entryPoints.websecure.transport.respondingTimeouts.readTimeout=60s", + "--entryPoints.websecure.transport.respondingTimeouts.writeTimeout=60s", + "--entryPoints.websecure.transport.respondingTimeouts.idleTimeout=180s", # Use forwarded headers from trusted proxies "--entryPoints.websecure.forwardedHeaders.insecure=false", "--entryPoints.web.forwardedHeaders.insecure=false",