[ci skip] Infrastructure hardening: security, monitoring, reliability, maintainability

Phase 1 - Critical Security: - Netbox: move hardcoded DB/superuser passwords to variables - MeshCentral: disable public registration, add Authentik auth - Traefik: disable insecure API dashboard (api.insecure=false) - Traefik: configure forwarded headers with Cloudflare trusted IPs Phase 2 - Security Hardening: - Add security headers middleware (HSTS, X-Frame-Options, nosniff, etc.) - Add Kyverno pod security policies in audit mode (privileged, host namespaces, SYS_ADMIN, trusted registries) - Tighten rate limiting (avg=10, burst=50) - Add Authentik protection to grampsweb Phase 3 - Monitoring & Alerting: - Add critical service alerts (PostgreSQL, MySQL, Redis, Headscale, Authentik, Loki) - Increase Loki retention from 7 to 30 days (720h) - Add predictive PV filling alert (predict_linear) - Re-enable Hackmd and Privatebin down alerts Phase 4 - Reliability: - Add resource requests/limits to Redis, DBaaS, Technitium, Headscale, Vaultwarden, Uptime Kuma - Increase Alloy DaemonSet memory to 512Mi/1Gi Phase 6 - Maintainability: - Extract duplicated tiers locals to terragrunt.hcl generate block (removed from 67 stacks) - Replace hardcoded NFS IP 10.0.10.15 with var.nfs_server (114 instances across 63 files) - Replace hardcoded Redis/PostgreSQL/MySQL/Ollama/mail host references with variables across ~35 stacks - Migrate xray raw ingress resources to ingress_factory modules
2026-02-23 22:05:28 +00:00 · 2026-02-23 22:05:28 +00:00 · 89a6e08245
commit 89a6e08245
parent 1b4737c90c
104 changed files with 773 additions and 920 deletions
--- a/stacks/ytdlp/main.tf
+++ b/stacks/ytdlp/main.tf
@ -2,16 +2,10 @@ variable "tls_secret_name" { type = string }
 variable "openrouter_api_key" { type = string }
 variable "slack_bot_token" { type = string }
 variable "slack_channel" { type = string }
+variable "nfs_server" { type = string }
+variable "redis_host" { type = string }
+variable "ollama_host" { type = string }

-locals {
-  tiers = {
-    core    = "0-core"
-    cluster = "1-cluster"
-    gpu     = "2-gpu"
-    edge    = "3-edge"
-    aux     = "4-aux"
-  }
-}

 resource "kubernetes_namespace" "ytdlp" {
  metadata {
@ -100,7 +94,7 @@ resource "kubernetes_deployment" "ytdlp" {
          name = "data"
          nfs {
            path   = "/mnt/main/ytdlp"
-            server = "10.0.10.15"
+            server = var.nfs_server
          }
        }
        # }
@ -247,7 +241,7 @@ resource "kubernetes_deployment" "yt_highlights" {
          }
          env {
            name  = "REDIS_URL"
-            value = "redis://redis.redis.svc.cluster.local:6379/0"
+            value = "redis://${var.redis_host}:6379/0"
          }
          # Store model cache on NFS to avoid ephemeral storage eviction
          env {
@ -261,7 +255,7 @@ resource "kubernetes_deployment" "yt_highlights" {
          # Ollama fallback for when OpenRouter models fail
          env {
            name  = "OLLAMA_URL"
-            value = "http://ollama.ollama.svc.cluster.local:11434"
+            value = "http://${var.ollama_host}:11434"
          }
          env {
            name  = "OLLAMA_MODEL"
@ -290,7 +284,7 @@ resource "kubernetes_deployment" "yt_highlights" {
        volume {
          name = "data"
          nfs {
-            server = "10.0.10.15"
+            server = var.nfs_server
            path   = "/mnt/main/ytdlp-highlights"
          }
        }