[ci skip] Infrastructure hardening: security, monitoring, reliability, maintainability

Phase 1 - Critical Security:
- Netbox: move hardcoded DB/superuser passwords to variables
- MeshCentral: disable public registration, add Authentik auth
- Traefik: disable insecure API dashboard (api.insecure=false)
- Traefik: configure forwarded headers with Cloudflare trusted IPs

Phase 2 - Security Hardening:
- Add security headers middleware (HSTS, X-Frame-Options, nosniff, etc.)
- Add Kyverno pod security policies in audit mode (privileged, host
  namespaces, SYS_ADMIN, trusted registries)
- Tighten rate limiting (avg=10, burst=50)
- Add Authentik protection to grampsweb

Phase 3 - Monitoring & Alerting:
- Add critical service alerts (PostgreSQL, MySQL, Redis, Headscale,
  Authentik, Loki)
- Increase Loki retention from 7 to 30 days (720h)
- Add predictive PV filling alert (predict_linear)
- Re-enable Hackmd and Privatebin down alerts

Phase 4 - Reliability:
- Add resource requests/limits to Redis, DBaaS, Technitium, Headscale,
  Vaultwarden, Uptime Kuma
- Increase Alloy DaemonSet memory to 512Mi/1Gi

Phase 6 - Maintainability:
- Extract duplicated tiers locals to terragrunt.hcl generate block
  (removed from 67 stacks)
- Replace hardcoded NFS IP 10.0.10.15 with var.nfs_server (114
  instances across 63 files)
- Replace hardcoded Redis/PostgreSQL/MySQL/Ollama/mail host references
  with variables across ~35 stacks
- Migrate xray raw ingress resources to ingress_factory modules
This commit is contained in:
Viktor Barzin 2026-02-23 22:05:28 +00:00
parent 1b4737c90c
commit 89a6e08245
104 changed files with 773 additions and 920 deletions

View file

@ -3,21 +3,14 @@ variable "immich_postgresql_password" { type = string }
variable "immich_frame_api_key" { type = string }
variable "homepage_credentials" { type = map(any) }
locals {
tiers = {
core = "0-core"
cluster = "1-cluster"
gpu = "2-gpu"
edge = "3-edge"
aux = "4-aux"
}
}
variable "immich_version" {
type = string
# Change me to upgrade
default = "v2.5.6"
}
variable "nfs_server" { type = string }
variable "redis_host" { type = string }
module "tls_secret" {
@ -104,7 +97,7 @@ resource "kubernetes_deployment" "immich_server" {
}
env {
name = "REDIS_HOSTNAME"
value = "redis.redis.svc.cluster.local"
value = var.redis_host
}
liveness_probe {
@ -176,7 +169,7 @@ resource "kubernetes_deployment" "immich_server" {
# volume {
# name = "library-old"
# nfs {
# server = "10.0.10.15"
# server = var.nfs_server
# path = "/mnt/main/immich/immich/"
# }
# }
@ -184,42 +177,42 @@ resource "kubernetes_deployment" "immich_server" {
volume {
name = "backups"
nfs {
server = "10.0.10.15"
server = var.nfs_server
path = "/mnt/main/immich/immich/backups"
}
}
volume {
name = "encoded-video"
nfs {
server = "10.0.10.15"
server = var.nfs_server
path = "/mnt/main/immich/immich/encoded-video"
}
}
volume {
name = "library"
nfs {
server = "10.0.10.15"
server = var.nfs_server
path = "/mnt/main/immich/immich/library"
}
}
volume {
name = "profile"
nfs {
server = "10.0.10.15"
server = var.nfs_server
path = "/mnt/main/immich/immich/profile"
}
}
volume {
name = "thumbs"
nfs {
server = "10.0.10.15"
server = var.nfs_server
path = "/mnt/ssd/immich/thumbs"
}
}
volume {
name = "upload"
nfs {
server = "10.0.10.15"
server = var.nfs_server
path = "/mnt/main/immich/immich/upload"
}
}
@ -305,7 +298,7 @@ resource "kubernetes_deployment" "immich-postgres" {
name = "postgresql-persistent-storage"
nfs {
path = "/mnt/main/immich/data-immich-postgresql"
server = "10.0.10.15"
server = var.nfs_server
}
}
}
@ -442,7 +435,7 @@ resource "kubernetes_deployment" "immich-machine-learning" {
nfs {
# path = "/mnt/main/immich/machine-learning"
path = "/mnt/ssd/immich/machine-learning" # load cache from ssd
server = "10.0.10.15"
server = var.nfs_server
}
}
}
@ -533,7 +526,7 @@ resource "kubernetes_cron_job_v1" "postgresql-backup" {
name = "postgresql-backup"
nfs {
path = "/mnt/main/immich/data-immich-postgresql"
server = "10.0.10.15"
server = var.nfs_server
}
}
}