diff --git a/.woodpecker/default.yml b/.woodpecker/default.yml
index 3e083ac5..5b96f74f 100644
--- a/.woodpecker/default.yml
+++ b/.woodpecker/default.yml
@@ -45,18 +45,15 @@ steps:
- "chmod 755 /usr/local/bin/terragrunt"
# Source Vault token
- "source .vault-env"
- # Apply extracted stacks in parallel
- - "cd stacks/dbaas && terragrunt apply --non-interactive -auto-approve &"
- - "cd stacks/authentik && terragrunt apply --non-interactive -auto-approve &"
- - "cd stacks/crowdsec && terragrunt apply --non-interactive -auto-approve &"
- - "cd stacks/monitoring && terragrunt apply --non-interactive -auto-approve &"
- - "cd stacks/nvidia && terragrunt apply --non-interactive -auto-approve &"
- - "cd stacks/mailserver && terragrunt apply --non-interactive -auto-approve &"
- - "cd stacks/cloudflared && terragrunt apply --non-interactive -auto-approve &"
- - "cd stacks/kyverno && terragrunt apply --non-interactive -auto-approve &"
- # Apply platform stack (remaining core infrastructure services)
- - "cd stacks/platform && terragrunt apply --non-interactive -auto-approve"
- - "wait"
+ # Apply all platform stacks in parallel
+ - |
+ for stack in dbaas authentik crowdsec monitoring nvidia mailserver cloudflared kyverno \
+ metallb redis traefik technitium headscale rbac k8s-portal vaultwarden \
+ reverse-proxy metrics-server vpa nfs-csi iscsi-csi cnpg sealed-secrets \
+ uptime-kuma wireguard xray infra-maintenance platform; do
+ (cd stacks/$stack && terragrunt apply --non-interactive -auto-approve) &
+ done
+ wait
- name: cleanup-and-push
image: alpine
diff --git a/stacks/cnpg/main.tf b/stacks/cnpg/main.tf
new file mode 100644
index 00000000..cdb77325
--- /dev/null
+++ b/stacks/cnpg/main.tf
@@ -0,0 +1,4 @@
+module "cnpg" {
+ source = "./modules/cnpg"
+ tier = local.tiers.cluster
+}
diff --git a/stacks/cnpg/modules/cnpg/main.tf b/stacks/cnpg/modules/cnpg/main.tf
new file mode 100644
index 00000000..64a1d730
--- /dev/null
+++ b/stacks/cnpg/modules/cnpg/main.tf
@@ -0,0 +1,53 @@
+variable "tier" { type = string }
+
+# -----------------------------------------------------------------------------
+# Namespace
+# -----------------------------------------------------------------------------
+resource "kubernetes_namespace" "cnpg_system" {
+ metadata {
+ name = "cnpg-system"
+ labels = {
+ tier = var.tier
+ }
+ }
+}
+
+# -----------------------------------------------------------------------------
+# CloudNativePG Operator — manages PostgreSQL clusters via CRDs
+# https://cloudnative-pg.io/
+# -----------------------------------------------------------------------------
+resource "helm_release" "cnpg" {
+ namespace = kubernetes_namespace.cnpg_system.metadata[0].name
+ create_namespace = false
+ name = "cnpg"
+ atomic = true
+ timeout = 300
+
+ repository = "https://cloudnative-pg.github.io/charts"
+ chart = "cloudnative-pg"
+ version = "0.27.1"
+
+ values = [yamlencode({
+ crds = {
+ create = true
+ }
+
+ replicaCount = 1
+
+ resources = {
+ requests = {
+ cpu = "100m"
+ memory = "256Mi"
+ }
+ limits = {
+ memory = "256Mi"
+ }
+ }
+ })]
+}
+
+# NOTE: local-path-provisioner is already installed in the cluster
+# (via cloud-init template) with StorageClass "local-path" (default).
+# ReclaimPolicy is "Delete" — for CNPG clusters, set
+# .spec.storage.pvcTemplate.storageClassName = "local-path" in the
+# Cluster CR. CNPG handles PVC lifecycle independently.
diff --git a/stacks/cnpg/secrets b/stacks/cnpg/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/cnpg/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/cnpg/terragrunt.hcl b/stacks/cnpg/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/cnpg/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/cnpg/tiers.tf b/stacks/cnpg/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/cnpg/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/headscale/main.tf b/stacks/headscale/main.tf
new file mode 100644
index 00000000..a0c731f3
--- /dev/null
+++ b/stacks/headscale/main.tf
@@ -0,0 +1,21 @@
+variable "tls_secret_name" { type = string }
+variable "nfs_server" { type = string }
+
+data "vault_kv_secret_v2" "secrets" {
+ mount = "secret"
+ name = "platform"
+}
+
+locals {
+ homepage_credentials = jsondecode(data.vault_kv_secret_v2.secrets.data["homepage_credentials"])
+}
+
+module "headscale" {
+ source = "./modules/headscale"
+ tls_secret_name = var.tls_secret_name
+ nfs_server = var.nfs_server
+ headscale_config = data.vault_kv_secret_v2.secrets.data["headscale_config"]
+ headscale_acl = data.vault_kv_secret_v2.secrets.data["headscale_acl"]
+ homepage_token = try(local.homepage_credentials["headscale"]["api_key"], "")
+ tier = local.tiers.core
+}
diff --git a/stacks/headscale/modules/headscale/main.tf b/stacks/headscale/modules/headscale/main.tf
new file mode 100644
index 00000000..4d5e9dc6
--- /dev/null
+++ b/stacks/headscale/modules/headscale/main.tf
@@ -0,0 +1,324 @@
+
+variable "tls_secret_name" {}
+variable "tier" { type = string }
+variable "headscale_config" {}
+variable "headscale_acl" {}
+variable "nfs_server" { type = string }
+variable "homepage_token" {
+ type = string
+ default = ""
+ sensitive = true
+}
+
+resource "kubernetes_namespace" "headscale" {
+ metadata {
+ name = "headscale"
+ labels = {
+ tier = var.tier
+ }
+ }
+}
+
+module "tls_secret" {
+ source = "../../../../modules/kubernetes/setup_tls_secret"
+ namespace = kubernetes_namespace.headscale.metadata[0].name
+ tls_secret_name = var.tls_secret_name
+}
+
+module "nfs_data" {
+ source = "../../../../modules/kubernetes/nfs_volume"
+ name = "headscale-data"
+ namespace = kubernetes_namespace.headscale.metadata[0].name
+ nfs_server = var.nfs_server
+ nfs_path = "/mnt/main/headscale"
+}
+
+resource "kubernetes_deployment" "headscale" {
+ metadata {
+ name = "headscale"
+ namespace = kubernetes_namespace.headscale.metadata[0].name
+ labels = {
+ app = "headscale"
+ tier = var.tier
+ # scare to try but probably non-http will fail
+ # "istio-injection" : "enabled"
+ }
+
+ annotations = {
+ "reloader.stakater.com/search" = "true"
+ }
+ }
+ spec {
+ replicas = 1
+ strategy {
+ type = "Recreate"
+ }
+ selector {
+ match_labels = {
+ app = "headscale"
+ }
+ }
+ template {
+ metadata {
+ labels = {
+ app = "headscale"
+ }
+ annotations = {
+ # "diun.enable" = "true"
+ "diun.enable" = "false"
+ "diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
+ }
+ }
+ spec {
+ container {
+ image = "headscale/headscale:0.23.0"
+ # image = "headscale/headscale:0.23.0-debug" # -debug is for debug images
+ name = "headscale"
+ command = ["headscale", "serve"]
+
+ resources {
+ requests = {
+ cpu = "50m"
+ memory = "128Mi"
+ }
+ limits = {
+ memory = "128Mi"
+ }
+ }
+
+ port {
+ container_port = 8080
+ }
+ port {
+ container_port = 9090
+ }
+ port {
+ container_port = 41641
+ }
+
+ liveness_probe {
+ http_get {
+ path = "/health"
+ port = 8080
+ }
+ initial_delay_seconds = 15
+ period_seconds = 30
+ timeout_seconds = 5
+ failure_threshold = 5
+ }
+ readiness_probe {
+ http_get {
+ path = "/health"
+ port = 8080
+ }
+ initial_delay_seconds = 5
+ period_seconds = 30
+ timeout_seconds = 5
+ failure_threshold = 3
+ }
+
+ volume_mount {
+ name = "config-volume"
+ mount_path = "/etc/headscale"
+ }
+
+ volume_mount {
+ mount_path = "/mnt"
+ name = "nfs-config"
+ }
+ }
+ volume {
+ name = "config-volume"
+ config_map {
+ name = "headscale-config"
+ items {
+ key = "config.yaml"
+ path = "config.yaml"
+ }
+ items {
+ key = "acl.yaml"
+ path = "acl.yaml"
+ }
+ }
+ }
+
+ volume {
+ name = "nfs-config"
+ persistent_volume_claim {
+ claim_name = module.nfs_data.claim_name
+ }
+ }
+ # container {
+ # image = "simcu/headscale-ui:0.1.4"
+ # name = "headscale-ui"
+ # port {
+ # container_port = 80
+ # }
+ # }
+ container {
+ image = "ghcr.io/gurucomputing/headscale-ui:latest"
+ # image = "ghcr.io/tale/headplane:0.3.2"
+ name = "headscale-ui"
+
+ resources {
+ requests = {
+ cpu = "25m"
+ memory = "128Mi"
+ }
+ limits = {
+ memory = "128Mi"
+ }
+ }
+
+ port {
+ container_port = 8081
+ # container_port = 3000
+ }
+ env {
+ name = "HTTP_PORT"
+ value = "8081"
+ }
+ # env {
+ # name = "HTTPS_PORT"
+ # value = "8082"
+ # }
+ env {
+ name = "HEADSCALE_URL"
+ value = "http://localhost:8080"
+ }
+ env {
+ name = "COOKIE_SECRET"
+ value = "kekekekke"
+ }
+ env {
+ name = "ROOT_API_KEY"
+ value = "kekekekeke"
+ }
+ }
+ dns_config {
+ option {
+ name = "ndots"
+ value = "2"
+ }
+ }
+ }
+ }
+ }
+}
+resource "kubernetes_service" "headscale" {
+ metadata {
+ name = "headscale"
+ namespace = kubernetes_namespace.headscale.metadata[0].name
+ labels = {
+ "app" = "headscale"
+ }
+ annotations = {
+ "prometheus.io/scrape" = "true"
+ "prometheus.io/port" = "9090"
+ }
+ # annotations = {
+ # "metallb.universe.tf/allow-shared-ip" : "shared"
+ # }
+ }
+
+ spec {
+ # type = "LoadBalancer"
+ # external_traffic_policy = "Cluster"
+ selector = {
+ app = "headscale"
+
+ }
+ port {
+ name = "headscale"
+ port = "8080"
+ protocol = "TCP"
+ }
+ port {
+ name = "headscale-ui"
+ port = "80"
+ target_port = 8081
+ # target_port = 3000
+ protocol = "TCP"
+ }
+ port {
+ name = "metrics"
+ port = "9090"
+ protocol = "TCP"
+ }
+ }
+}
+
+module "ingress" {
+ source = "../../../../modules/kubernetes/ingress_factory"
+ namespace = kubernetes_namespace.headscale.metadata[0].name
+ name = "headscale"
+ port = 8080
+ tls_secret_name = var.tls_secret_name
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "Headscale"
+ "gethomepage.dev/description" = "VPN mesh network"
+ "gethomepage.dev/icon" = "headscale.png"
+ "gethomepage.dev/group" = "Identity & Security"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+module "ingress-ui" {
+ source = "../../../../modules/kubernetes/ingress_factory"
+ namespace = kubernetes_namespace.headscale.metadata[0].name
+ name = "headscale-ui"
+ host = "headscale"
+ service_name = "headscale"
+ port = 8081
+ ingress_path = ["/web"]
+ tls_secret_name = var.tls_secret_name
+}
+
+resource "kubernetes_service" "headscale-server" {
+ metadata {
+ name = "headscale-server"
+ namespace = kubernetes_namespace.headscale.metadata[0].name
+ labels = {
+ "app" = "headscale"
+ }
+ annotations = {
+ "metallb.universe.tf/allow-shared-ip" : "shared"
+ }
+ }
+
+ spec {
+ type = "LoadBalancer"
+ external_traffic_policy = "Cluster"
+ selector = {
+ app = "headscale"
+
+ }
+ # port {
+ # name = "headscale-tcp"
+ # port = "41641"
+ # protocol = "TCP"
+ # }
+ port {
+ name = "headscale-udp"
+ port = "41641"
+ protocol = "UDP"
+ }
+ }
+}
+
+resource "kubernetes_config_map" "headscale-config" {
+ metadata {
+ name = "headscale-config"
+ namespace = kubernetes_namespace.headscale.metadata[0].name
+
+ annotations = {
+ "reloader.stakater.com/match" = "true"
+ }
+ }
+
+ data = {
+ "config.yaml" = var.headscale_config
+ "acl.yaml" = var.headscale_acl
+ }
+}
diff --git a/stacks/headscale/secrets b/stacks/headscale/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/headscale/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/headscale/terragrunt.hcl b/stacks/headscale/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/headscale/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/headscale/tiers.tf b/stacks/headscale/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/headscale/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/infra-maintenance/main.tf b/stacks/infra-maintenance/main.tf
new file mode 100644
index 00000000..7faba6ea
--- /dev/null
+++ b/stacks/infra-maintenance/main.tf
@@ -0,0 +1,15 @@
+variable "nfs_server" { type = string }
+
+data "vault_kv_secret_v2" "secrets" {
+ mount = "secret"
+ name = "platform"
+}
+
+module "infra-maintenance" {
+ source = "./modules/infra-maintenance"
+ nfs_server = var.nfs_server
+ git_user = data.vault_kv_secret_v2.secrets.data["webhook_handler_git_user"]
+ git_token = data.vault_kv_secret_v2.secrets.data["webhook_handler_git_token"]
+ technitium_username = data.vault_kv_secret_v2.secrets.data["technitium_username"]
+ technitium_password = data.vault_kv_secret_v2.secrets.data["technitium_password"]
+}
diff --git a/stacks/infra-maintenance/modules/infra-maintenance/main.tf b/stacks/infra-maintenance/modules/infra-maintenance/main.tf
new file mode 100644
index 00000000..abf50453
--- /dev/null
+++ b/stacks/infra-maintenance/modules/infra-maintenance/main.tf
@@ -0,0 +1,274 @@
+# Module to run some infra-specific things like updating the public ip
+variable "git_user" {}
+variable "git_token" {}
+variable "technitium_username" {}
+variable "technitium_password" {}
+variable "nfs_server" { type = string }
+
+
+# DISABLED WHILST USING CLOUDFLARE NS
+# resource "kubernetes_cron_job_v1" "update-public-ip" {
+# metadata {
+# name = "update-public-ip"
+# namespace = "default"
+# }
+# spec {
+# schedule = "*/5 * * * *"
+# successful_jobs_history_limit = 1
+# failed_jobs_history_limit = 1
+# concurrency_policy = "Forbid"
+# job_template {
+# metadata {
+# name = "update-public-ip"
+# }
+# spec {
+# template {
+# metadata {
+# name = "update-public-ip"
+# }
+# spec {
+# priority_class_name = "system-cluster-critical"
+# container {
+# name = "update-public-ip"
+# image = "viktorbarzin/infra"
+# command = ["./infra_cli"]
+# args = ["-use-case", "update-public-ip"]
+
+# env {
+# name = "GIT_USER"
+# value = var.git_user
+# }
+# env {
+# name = "GIT_TOKEN"
+# value = var.git_token
+# }
+# env {
+# name = "TECHNITIUM_USERNAME"
+# value = var.technitium_username
+# }
+# env {
+# name = "TECHNITIUM_PASSWORD"
+# value = var.technitium_password
+# }
+# }
+# restart_policy = "Never"
+# # service_account_name = "descheduler-sa"
+# # volume {
+# # name = "policy-volume"
+# # config_map {
+# # name = "policy-configmap"
+# # }
+# # }
+# }
+# }
+# }
+# }
+# }
+# }
+
+module "nfs_etcd_backup" {
+ source = "../../../../modules/kubernetes/nfs_volume"
+ name = "infra-etcd-backup"
+ namespace = "default"
+ nfs_server = var.nfs_server
+ nfs_path = "/mnt/main/etcd-backup"
+}
+
+# # backup etcd
+resource "kubernetes_cron_job_v1" "backup-etcd" {
+ metadata {
+ name = "backup-etcd"
+ namespace = "default"
+ }
+ spec {
+ schedule = "0 0 * * *"
+ successful_jobs_history_limit = 1
+ failed_jobs_history_limit = 1
+ concurrency_policy = "Forbid"
+ job_template {
+ metadata {
+ name = "backup-etcd"
+ }
+ spec {
+ template {
+ metadata {
+ name = "backup-etcd"
+ }
+ spec {
+ node_name = "k8s-master"
+ priority_class_name = "system-cluster-critical"
+ host_network = true
+ container {
+ name = "backup-etcd"
+ image = "registry.k8s.io/etcd:3.5.21-0"
+ command = ["etcdctl"]
+ args = ["--endpoints=https://127.0.0.1:2379", "--cacert=/etc/kubernetes/pki/etcd/ca.crt", "--cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt", "--key=/etc/kubernetes/pki/etcd/healthcheck-client.key", "snapshot", "save", "/backup/etcd-snapshot-latest.db"]
+ env {
+ name = "ETCDCTL_API"
+ value = "3"
+ }
+ volume_mount {
+ mount_path = "/backup"
+ name = "backup"
+ }
+ volume_mount {
+ mount_path = "/etc/kubernetes/pki/etcd"
+ name = "etcd-certs"
+ read_only = true
+ }
+ }
+ container {
+ name = "backup-purge"
+ image = "busybox:1.31.1"
+ command = ["/bin/sh"]
+ args = ["-c", "find /backup -type f -mtime +30 -name '*.db' -exec rm -- '{}' \\;"]
+
+ volume_mount {
+ mount_path = "/backup"
+ name = "backup"
+ }
+ }
+
+ volume {
+ name = "backup"
+ persistent_volume_claim {
+ claim_name = module.nfs_etcd_backup.claim_name
+ }
+ }
+ volume {
+ name = "etcd-certs"
+ host_path {
+ path = "/etc/kubernetes/pki/etcd"
+ type = "DirectoryOrCreate"
+ }
+ }
+ restart_policy = "Never"
+ }
+ }
+ }
+ }
+ }
+}
+
+# Weekly etcd defragmentation — prevents fragmentation buildup that causes slow requests
+resource "kubernetes_cron_job_v1" "defrag-etcd" {
+ metadata {
+ name = "defrag-etcd"
+ namespace = "default"
+ }
+ spec {
+ schedule = "0 3 * * 0"
+ successful_jobs_history_limit = 1
+ failed_jobs_history_limit = 1
+ concurrency_policy = "Forbid"
+ job_template {
+ metadata {
+ name = "defrag-etcd"
+ }
+ spec {
+ template {
+ metadata {
+ name = "defrag-etcd"
+ }
+ spec {
+ node_name = "k8s-master"
+ priority_class_name = "system-cluster-critical"
+ host_network = true
+ container {
+ name = "defrag-etcd"
+ image = "registry.k8s.io/etcd:3.5.21-0"
+ command = ["etcdctl"]
+ args = ["--endpoints=https://127.0.0.1:2379", "--cacert=/etc/kubernetes/pki/etcd/ca.crt", "--cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt", "--key=/etc/kubernetes/pki/etcd/healthcheck-client.key", "--command-timeout=60s", "defrag"]
+ env {
+ name = "ETCDCTL_API"
+ value = "3"
+ }
+ volume_mount {
+ mount_path = "/etc/kubernetes/pki/etcd"
+ name = "etcd-certs"
+ read_only = true
+ }
+ }
+ volume {
+ name = "etcd-certs"
+ host_path {
+ path = "/etc/kubernetes/pki/etcd"
+ type = "DirectoryOrCreate"
+ }
+ }
+ restart_policy = "Never"
+ }
+ }
+ }
+ }
+ }
+}
+
+# Clean up evicted/failed pods cluster-wide daily
+resource "kubernetes_cron_job_v1" "cleanup-failed-pods" {
+ metadata {
+ name = "cleanup-failed-pods"
+ namespace = "default"
+ }
+ spec {
+ schedule = "0 2 * * *"
+ successful_jobs_history_limit = 1
+ failed_jobs_history_limit = 1
+ concurrency_policy = "Forbid"
+ job_template {
+ metadata {
+ name = "cleanup-failed-pods"
+ }
+ spec {
+ template {
+ metadata {
+ name = "cleanup-failed-pods"
+ }
+ spec {
+ service_account_name = kubernetes_service_account.cleanup_sa.metadata[0].name
+ container {
+ name = "cleanup"
+ image = "bitnami/kubectl:latest"
+ command = ["/bin/sh", "-c", "kubectl delete pods -A --field-selector=status.phase=Failed --ignore-not-found"]
+ }
+ restart_policy = "Never"
+ }
+ }
+ }
+ }
+ }
+}
+
+resource "kubernetes_service_account" "cleanup_sa" {
+ metadata {
+ name = "failed-pod-cleanup"
+ namespace = "default"
+ }
+}
+
+resource "kubernetes_cluster_role" "cleanup_role" {
+ metadata {
+ name = "failed-pod-cleanup"
+ }
+ rule {
+ api_groups = [""]
+ resources = ["pods"]
+ verbs = ["list", "delete"]
+ }
+}
+
+resource "kubernetes_cluster_role_binding" "cleanup_binding" {
+ metadata {
+ name = "failed-pod-cleanup"
+ }
+ role_ref {
+ api_group = "rbac.authorization.k8s.io"
+ kind = "ClusterRole"
+ name = kubernetes_cluster_role.cleanup_role.metadata[0].name
+ }
+ subject {
+ kind = "ServiceAccount"
+ name = kubernetes_service_account.cleanup_sa.metadata[0].name
+ namespace = "default"
+ }
+}
diff --git a/stacks/infra-maintenance/secrets b/stacks/infra-maintenance/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/infra-maintenance/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/infra-maintenance/terragrunt.hcl b/stacks/infra-maintenance/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/infra-maintenance/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/infra-maintenance/tiers.tf b/stacks/infra-maintenance/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/infra-maintenance/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/iscsi-csi/main.tf b/stacks/iscsi-csi/main.tf
new file mode 100644
index 00000000..0cbc798d
--- /dev/null
+++ b/stacks/iscsi-csi/main.tf
@@ -0,0 +1,14 @@
+variable "nfs_server" { type = string }
+
+data "vault_kv_secret_v2" "secrets" {
+ mount = "secret"
+ name = "platform"
+}
+
+module "iscsi-csi" {
+ source = "./modules/iscsi-csi"
+ tier = local.tiers.cluster
+ truenas_host = var.nfs_server
+ truenas_api_key = data.vault_kv_secret_v2.secrets.data["truenas_api_key"]
+ truenas_ssh_private_key = data.vault_kv_secret_v2.secrets.data["truenas_ssh_private_key"]
+}
diff --git a/stacks/iscsi-csi/modules/iscsi-csi/main.tf b/stacks/iscsi-csi/modules/iscsi-csi/main.tf
new file mode 100644
index 00000000..4c4a8d59
--- /dev/null
+++ b/stacks/iscsi-csi/modules/iscsi-csi/main.tf
@@ -0,0 +1,148 @@
+resource "kubernetes_namespace" "iscsi_csi" {
+ metadata {
+ name = "iscsi-csi"
+ labels = {
+ tier = var.tier
+ "resource-governance/custom-quota" = "true"
+ }
+ }
+}
+
+resource "helm_release" "democratic_csi" {
+ namespace = kubernetes_namespace.iscsi_csi.metadata[0].name
+ create_namespace = false
+ name = "democratic-csi-iscsi"
+ atomic = true
+ timeout = 300
+
+ repository = "https://democratic-csi.github.io/charts/"
+ chart = "democratic-csi"
+
+ values = [yamlencode({
+ csiDriver = {
+ name = "org.democratic-csi.iscsi"
+ }
+
+ storageClasses = [{
+ name = "iscsi-truenas"
+ defaultClass = false
+ reclaimPolicy = "Retain"
+ volumeBindingMode = "Immediate"
+ allowVolumeExpansion = true
+ parameters = {
+ fsType = "ext4"
+ }
+ mountOptions = []
+ }]
+
+ controller = {
+ replicas = 2
+ driver = {
+ resources = {
+ requests = { cpu = "25m", memory = "192Mi" }
+ limits = { memory = "192Mi" }
+ }
+ }
+ externalProvisioner = {
+ resources = {
+ requests = { cpu = "5m", memory = "64Mi" }
+ limits = { memory = "64Mi" }
+ }
+ }
+ externalAttacher = {
+ resources = {
+ requests = { cpu = "5m", memory = "64Mi" }
+ limits = { memory = "64Mi" }
+ }
+ }
+ externalResizer = {
+ resources = {
+ requests = { cpu = "5m", memory = "64Mi" }
+ limits = { memory = "64Mi" }
+ }
+ }
+ externalSnapshotter = {
+ resources = {
+ requests = { cpu = "5m", memory = "80Mi" }
+ limits = { memory = "80Mi" }
+ }
+ }
+ }
+
+ # csiProxy is a top-level chart key, NOT nested under controller/node
+ csiProxy = {
+ resources = {
+ requests = { cpu = "5m", memory = "32Mi" }
+ limits = { memory = "32Mi" }
+ }
+ }
+
+ node = {
+ driver = {
+ resources = {
+ requests = { cpu = "25m", memory = "192Mi" }
+ limits = { memory = "192Mi" }
+ }
+ }
+ driverRegistrar = {
+ resources = {
+ requests = { cpu = "5m", memory = "32Mi" }
+ limits = { memory = "32Mi" }
+ }
+ }
+ cleanup = {
+ resources = {
+ requests = { cpu = "5m", memory = "32Mi" }
+ limits = { memory = "32Mi" }
+ }
+ }
+
+ hostPID = true
+ hostPath = "/lib/modules"
+ }
+
+ driver = {
+ config = {
+ driver = "freenas-iscsi"
+
+ instance_id = "truenas-iscsi"
+
+ httpConnection = {
+ protocol = "http"
+ host = var.truenas_host
+ port = 80
+ apiKey = var.truenas_api_key
+ }
+
+ sshConnection = {
+ host = var.truenas_host
+ port = 22
+ username = "root"
+ privateKey = var.truenas_ssh_private_key
+ }
+
+ zfs = {
+ datasetParentName = "main/iscsi"
+ detachedSnapshotsDatasetParentName = "main/iscsi-snaps"
+ }
+
+ iscsi = {
+ targetPortal = "${var.truenas_host}:3260"
+ namePrefix = "csi-"
+ nameSuffix = ""
+ targetGroups = [{
+ targetGroupPortalGroup = 1
+ targetGroupInitiatorGroup = 1
+ targetGroupAuthType = "None"
+ }]
+ extentInsecureTpc = true
+ extentXenCompat = false
+ extentDisablePhysicalBlocksize = true
+ extentBlocksize = 512
+ extentRpm = "SSD"
+ extentAvailThreshold = 0
+ }
+ }
+ }
+ })]
+}
diff --git a/stacks/iscsi-csi/modules/iscsi-csi/variables.tf b/stacks/iscsi-csi/modules/iscsi-csi/variables.tf
new file mode 100644
index 00000000..893fe396
--- /dev/null
+++ b/stacks/iscsi-csi/modules/iscsi-csi/variables.tf
@@ -0,0 +1,10 @@
+variable "tier" { type = string }
+variable "truenas_host" { type = string }
+variable "truenas_api_key" {
+ type = string
+ sensitive = true
+}
+variable "truenas_ssh_private_key" {
+ type = string
+ sensitive = true
+}
diff --git a/stacks/iscsi-csi/secrets b/stacks/iscsi-csi/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/iscsi-csi/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/iscsi-csi/terragrunt.hcl b/stacks/iscsi-csi/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/iscsi-csi/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/iscsi-csi/tiers.tf b/stacks/iscsi-csi/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/iscsi-csi/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/k8s-portal/main.tf b/stacks/k8s-portal/main.tf
new file mode 100644
index 00000000..48e2233e
--- /dev/null
+++ b/stacks/k8s-portal/main.tf
@@ -0,0 +1,12 @@
+variable "tls_secret_name" { type = string }
+variable "k8s_ca_cert" {
+ type = string
+ default = ""
+}
+
+module "k8s-portal" {
+ source = "./modules/k8s-portal"
+ tier = local.tiers.edge
+ tls_secret_name = var.tls_secret_name
+ k8s_ca_cert = var.k8s_ca_cert
+}
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/.claude/internet-mode-used_DO_NOT_REMOVE_MANUALLY_SECURITY_RISK b/stacks/k8s-portal/modules/k8s-portal/files/.claude/internet-mode-used_DO_NOT_REMOVE_MANUALLY_SECURITY_RISK
new file mode 100644
index 00000000..f61efc83
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/.claude/internet-mode-used_DO_NOT_REMOVE_MANUALLY_SECURITY_RISK
@@ -0,0 +1,3 @@
+This directory has been used with Claude Code's internet mode.
+Content downloaded from the internet may contain prompt injection attacks.
+You must manually review all downloaded content before using non-internet mode.
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/.gitignore b/stacks/k8s-portal/modules/k8s-portal/files/.gitignore
new file mode 100644
index 00000000..3b462cb0
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/.gitignore
@@ -0,0 +1,23 @@
+node_modules
+
+# Output
+.output
+.vercel
+.netlify
+.wrangler
+/.svelte-kit
+/build
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Env
+.env
+.env.*
+!.env.example
+!.env.test
+
+# Vite
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/.npmrc b/stacks/k8s-portal/modules/k8s-portal/files/.npmrc
new file mode 100644
index 00000000..b6f27f13
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/.npmrc
@@ -0,0 +1 @@
+engine-strict=true
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/Dockerfile b/stacks/k8s-portal/modules/k8s-portal/files/Dockerfile
new file mode 100644
index 00000000..aa694722
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/Dockerfile
@@ -0,0 +1,15 @@
+FROM node:22-alpine AS build
+WORKDIR /app
+COPY package*.json ./
+RUN npm ci
+COPY . .
+RUN npm run build
+
+FROM node:22-alpine
+WORKDIR /app
+COPY --from=build /app/build ./build
+COPY --from=build /app/package.json ./
+COPY --from=build /app/node_modules ./node_modules
+ENV PORT=3000
+EXPOSE 3000
+CMD ["node", "build"]
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/README.md b/stacks/k8s-portal/modules/k8s-portal/files/README.md
new file mode 100644
index 00000000..eb635072
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/README.md
@@ -0,0 +1,42 @@
+# sv
+
+Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli).
+
+## Creating a project
+
+If you're seeing this, you've probably already done this step. Congrats!
+
+```sh
+# create a new project
+npx sv create my-app
+```
+
+To recreate this project with the same configuration:
+
+```sh
+# recreate this project
+npx sv create --template minimal --types ts --install npm .
+```
+
+## Developing
+
+Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
+
+```sh
+npm run dev
+
+# or start the server and open the app in a new browser tab
+npm run dev -- --open
+```
+
+## Building
+
+To create a production version of your app:
+
+```sh
+npm run build
+```
+
+You can preview the production build with `npm run preview`.
+
+> To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment.
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/app.d.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/app.d.ts
new file mode 100644
index 00000000..da08e6da
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/app.d.ts
@@ -0,0 +1,13 @@
+// See https://svelte.dev/docs/kit/types#app.d.ts
+// for information about these interfaces
+declare global {
+ namespace App {
+ // interface Error {}
+ // interface Locals {}
+ // interface PageData {}
+ // interface PageState {}
+ // interface Platform {}
+ }
+}
+
+export {};
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/app.html b/stacks/k8s-portal/modules/k8s-portal/files/src/app.html
new file mode 100644
index 00000000..f273cc58
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/app.html
@@ -0,0 +1,11 @@
+
+
+
+
+
+ %sveltekit.head%
+
+
+ %sveltekit.body%
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/lib/assets/favicon.svg b/stacks/k8s-portal/modules/k8s-portal/files/src/lib/assets/favicon.svg
new file mode 100644
index 00000000..cc5dc66a
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/lib/assets/favicon.svg
@@ -0,0 +1 @@
+svelte-logo
\ No newline at end of file
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/lib/index.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/lib/index.ts
new file mode 100644
index 00000000..856f2b6c
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/lib/index.ts
@@ -0,0 +1 @@
+// place files you want to import through the `$lib` alias in this folder.
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+layout.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+layout.svelte
new file mode 100644
index 00000000..d412c4d6
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+layout.svelte
@@ -0,0 +1,64 @@
+
+
+
+
+
+
+
+
+
+
+{@render children()}
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.server.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.server.ts
new file mode 100644
index 00000000..cc532664
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.server.ts
@@ -0,0 +1,33 @@
+import type { PageServerLoad } from './$types';
+import { readFileSync } from 'fs';
+
+interface UserRole {
+ role: string;
+ namespaces: string[];
+}
+
+export const load: PageServerLoad = async ({ request }) => {
+ const email = request.headers.get('x-authentik-email') || 'unknown';
+ const username = request.headers.get('x-authentik-username') || 'unknown';
+ const groups = request.headers.get('x-authentik-groups') || '';
+
+ // Read user roles from ConfigMap-mounted file
+ let userRole: UserRole = { role: 'unknown', namespaces: [] };
+ try {
+ const usersJson = readFileSync('/config/users.json', 'utf-8');
+ const users = JSON.parse(usersJson);
+ if (users[email]) {
+ userRole = users[email];
+ }
+ } catch {
+ // ConfigMap not mounted or parse error
+ }
+
+ return {
+ email,
+ username,
+ groups: groups.split('|').filter(Boolean),
+ role: userRole.role,
+ namespaces: userRole.namespaces
+ };
+};
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.svelte
new file mode 100644
index 00000000..2d13fa39
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.svelte
@@ -0,0 +1,102 @@
+
+
+
+ Kubernetes Access Portal
+
+
+
VPN Required — The cluster is on a private network. You need Headscale VPN access before kubectl will work.
+
See the Getting Started guide for VPN setup instructions.
+
+
+
+ Your Identity
+ Username: {data.username}
+ Email: {data.email}
+ Role: {data.role}
+ {#if data.namespaces.length > 0}
+ Namespaces: {data.namespaces.join(', ')}
+ {/if}
+
+
+ {#if data.role === 'namespace-owner'}
+
+ Your Namespace
+ Assigned namespaces: {data.namespaces.join(', ')}
+
+ Quick Commands
+
+# Check your pods
+kubectl get pods -n {data.namespaces[0]}
+
+# View quota usage
+kubectl describe resourcequota -n {data.namespaces[0]}
+
+# Log into Vault
+vault login -method=oidc
+
+# Store a secret
+vault kv put secret/{data.username}/myapp KEY=value
+
+# Get K8s deploy token
+vault write kubernetes/creds/{data.namespaces[0]}-deployer \
+ kubernetes_namespace={data.namespaces[0]}
+
+ {/if}
+
+
+
+
+
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+page.svelte
new file mode 100644
index 00000000..bf7f3f3a
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+page.svelte
@@ -0,0 +1,61 @@
+
+ Agent Bootstrap
+ Point any AI coding agent at this cluster and it can bootstrap itself automatically.
+
+
+ For AI Agents
+ Fetch the machine-readable bootstrap document:
+ curl -fsSL https://k8s-portal.viktorbarzin.me/agent
+ This returns a plain-text markdown document with everything an agent needs: setup commands, critical rules, secrets workflow, Terraform conventions, key file paths, and common operations.
+
+
+
+ Usage with Claude Code
+ claude "$(curl -fsSL https://k8s-portal.viktorbarzin.me/agent)" "Deploy a new echo service"
+ Or within a session:
+
+ Clone the repo: git clone https://github.com/ViktorBarzin/infra.git && cd infra
+ Start Claude Code: claude
+ Claude auto-reads AGENTS.md and .claude/CLAUDE.md from the repo
+
+
+
+
+ Usage with Codex / Other Agents
+
+ Clone the repo and cd into it
+ Run the setup script: bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)
+ Start the agent — it will read AGENTS.md for instructions
+
+ If the agent doesn't auto-read AGENTS.md, feed it the bootstrap doc:
+ curl -fsSL https://k8s-portal.viktorbarzin.me/agent
+
+
+
+ What the Agent Gets
+
+ Quick-start commands (setup script, repo clone)
+ Critical rules (no kubectl apply, no plaintext secrets, no NFS restart)
+ Sealed Secrets workflow (kubeseal self-service)
+ Terraform conventions (fileset pattern, tiers, storage, shared vars)
+ Key file paths in the repo
+ Common operations (deploy, fix pods, add secrets)
+ Contributing workflow (branch, PR, review, CI)
+
+
+
+
+ The /agent Endpoint
+ The endpoint is unauthenticated — no login required. Agents can curl or WebFetch it directly without a browser session, just like the setup script.
+ Content-Type: text/plain — no HTML parsing needed.
+
+
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts
new file mode 100644
index 00000000..3d0fa891
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts
@@ -0,0 +1,161 @@
+import type { RequestHandler } from './$types';
+
+const BOOTSTRAP_DOC = `# Infrastructure Cluster — AI Agent Bootstrap
+
+> Fetch this document: \`curl -fsSL https://k8s-portal.viktorbarzin.me/agent\`
+
+## Quick Start
+
+\`\`\`bash
+# 1. Install tools (kubectl, kubelogin, kubeseal)
+bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)
+
+# 2. Clone the infrastructure repo
+git clone https://github.com/ViktorBarzin/infra.git && cd infra
+
+# 3. Verify cluster access (opens browser for OIDC login on first run)
+kubectl get namespaces
+\`\`\`
+
+## Critical Rules (MUST FOLLOW)
+
+- **ALL changes through Terraform/Terragrunt** — NEVER \`kubectl apply/edit/patch/delete\` for persistent changes. Read-only kubectl is fine.
+- **NEVER put secrets in plaintext** — use Sealed Secrets (\`kubeseal\`) or \`secrets.sops.json\` (SOPS-encrypted).
+- **NEVER restart NFS on TrueNAS** — causes cluster-wide mount failures across all pods.
+- **NEVER commit secrets** — triple-check before every commit.
+- **\`[ci skip]\` in commit messages** when changes were already applied locally.
+- **Ask before \`git push\`** — always confirm with the user first.
+
+## Sealed Secrets (Self-Service)
+
+You can manage your own secrets without SOPS access using \`kubeseal\`:
+
+\`\`\`bash
+# 1. Create a sealed secret
+kubectl create secret generic \\
+ --from-literal=key=value -n \\
+ --dry-run=client -o yaml | \\
+ kubeseal --controller-name sealed-secrets \\
+ --controller-namespace sealed-secrets -o yaml > sealed-.yaml
+
+# 2. Place the file in the stack directory: stacks//sealed-.yaml
+
+# 3. Ensure the stack's main.tf has the fileset block (add if missing):
+\`\`\`
+
+\`\`\`hcl
+resource "kubernetes_manifest" "sealed_secrets" {
+ for_each = fileset(path.module, "sealed-*.yaml")
+ manifest = yamldecode(file("\${path.module}/\${each.value}"))
+}
+\`\`\`
+
+\`\`\`bash
+# 4. Push to PR — CI runs terragrunt apply — controller decrypts into real K8s Secrets
+\`\`\`
+
+- Files MUST match the \`sealed-*.yaml\` glob pattern.
+- Only the in-cluster controller has the private key. \`kubeseal\` uses the public key — safe to distribute.
+- The \`kubernetes_manifest\` block is safe to add even with zero sealed-*.yaml files (empty for_each).
+
+## SOPS Secrets (Admin-Only Fallback)
+
+For secrets requiring admin access (shared infra passwords, API keys):
+- **\`secrets.sops.json\`** — SOPS-encrypted secrets (JSON format)
+- **Edit**: \`sops secrets.sops.json\` (opens $EDITOR, re-encrypts on save)
+- **Add**: \`sops set secrets.sops.json '["new_key"]' '"value"'\`
+- **Operators without SOPS keys**: comment on your PR asking Viktor to add the secret.
+
+## Terraform Conventions
+
+### Execution
+- **Apply a service**: \`scripts/tg apply --non-interactive\` (auto-decrypts SOPS secrets)
+- **Plan**: \`scripts/tg plan --non-interactive\`
+- **kubectl**: \`kubectl --kubeconfig $(pwd)/config\`
+- **Health check**: \`bash scripts/cluster_healthcheck.sh --quiet\`
+
+### Key Paths
+| Path | Purpose |
+|------|---------|
+| \`stacks//main.tf\` | Service definition |
+| \`stacks/platform/modules//\` | Core infra modules (~22) |
+| \`modules/kubernetes/ingress_factory/\` | Standardized ingress (auth, rate limiting, anti-AI) |
+| \`modules/kubernetes/nfs_volume/\` | NFS volume module (CSI-backed, soft mount) |
+| \`config.tfvars\` | Non-secret configuration (plaintext) |
+| \`secrets.sops.json\` | All secrets (SOPS-encrypted JSON) |
+| \`scripts/cluster_healthcheck.sh\` | 25-check cluster health script |
+| \`AGENTS.md\` | Full AI agent instructions (auto-loaded by most agents) |
+
+### Tier System
+\`0-core\` | \`1-cluster\` | \`2-gpu\` | \`3-edge\` | \`4-aux\`
+
+Kyverno auto-generates LimitRange + ResourceQuota per namespace based on tier label.
+- Containers without explicit \`resources {}\` get default limits (256Mi for edge/aux — causes OOMKill for heavy apps)
+- Always set explicit resources on containers that need more than defaults
+- Opt-out labels: \`resource-governance/custom-quota=true\` / \`resource-governance/custom-limitrange=true\`
+
+### Storage
+- **NFS** (\`nfs-truenas\` StorageClass): For app data. Use the \`nfs_volume\` module.
+- **iSCSI** (\`iscsi-truenas\` StorageClass): For databases (PostgreSQL, MySQL).
+
+### Shared Variables (never hardcode)
+\`var.nfs_server\`, \`var.redis_host\`, \`var.postgresql_host\`, \`var.mysql_host\`, \`var.ollama_host\`, \`var.mail_host\`
+
+## Architecture
+
+- Terragrunt-based homelab managing a Kubernetes cluster (5 nodes, v1.34.2) on Proxmox VMs
+- 70+ services, each in \`stacks//\` with its own Terraform state
+- Core platform: \`stacks/platform/modules/\` (Traefik, Kyverno, monitoring, dbaas, sealed-secrets, etc.)
+- Public domain: \`viktorbarzin.me\` (Cloudflare) | Internal: \`viktorbarzin.lan\` (Technitium DNS)
+- CI/CD: Woodpecker CI — PRs run plan, merges to master auto-apply platform stack
+
+## Common Operations
+
+### Deploy a New Service
+1. Copy an existing stack as template: \`cp -r stacks/echo stacks/my-service\`
+2. Edit \`main.tf\` — update image, ports, ingress, resources
+3. Add DNS in \`config.tfvars\`
+4. Apply platform first if needed, then the service
+
+### Fix Crashed Pods
+1. Run \`bash scripts/cluster_healthcheck.sh --quiet\`
+2. Safe to delete evicted/failed pods and CrashLoopBackOff pods with >10 restarts
+3. OOMKilled? Check \`kubectl describe limitrange tier-defaults -n \` and increase \`resources.limits.memory\`
+
+### Add a Secret
+- **Self-service**: Use \`kubeseal\` (see Sealed Secrets section above)
+- **Admin**: \`sops set secrets.sops.json '["key"]' '"value"'\` then commit
+
+## Contributing Workflow
+
+1. Create a branch: \`git checkout -b fix/my-change\`
+2. Make changes in \`stacks//main.tf\`
+3. Push and open a PR: \`git push -u origin fix/my-change\`
+4. Viktor reviews and merges
+5. CI applies automatically — Slack notification when done
+
+## Infrastructure Details
+
+- **Proxmox**: 192.168.1.127 (Dell R730, 22c/44t, 142GB RAM)
+- **Nodes**: k8s-master (10.0.20.100), node1 (GPU, Tesla T4), node2-4
+- **GPU workloads**: \`node_selector = { "gpu": "true" }\` + toleration \`nvidia.com/gpu\`
+- **Pull-through cache**: 10.0.20.10 — use versioned image tags (cache serves stale :latest manifests)
+- **MySQL InnoDB Cluster**: 3 instances on iSCSI
+- **SMTP**: \`var.mail_host\` port 587 STARTTLS
+
+## Further Reading
+
+- Full agent instructions: \`AGENTS.md\` in the repo root
+- Patterns and examples: \`.claude/reference/patterns.md\`
+- Service catalog: \`.claude/reference/service-catalog.md\`
+- Onboarding guide: https://k8s-portal.viktorbarzin.me/onboarding
+`;
+
+export const GET: RequestHandler = async () => {
+ return new Response(BOOTSTRAP_DOC, {
+ headers: {
+ 'Content-Type': 'text/plain; charset=utf-8',
+ 'Cache-Control': 'public, max-age=3600'
+ }
+ });
+};
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/architecture/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/architecture/+page.svelte
new file mode 100644
index 00000000..2790f074
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/architecture/+page.svelte
@@ -0,0 +1,75 @@
+
+ Architecture
+
+
+ Overview
+ The infrastructure runs on a single Dell R730 server (22 CPU cores, 142GB RAM) using Proxmox to manage virtual machines. Five of those VMs form a Kubernetes cluster that runs 70+ services.
+
+Proxmox (Dell R730)
+ ├── k8s-master (10.0.20.100) — control plane
+ ├── k8s-node1 (10.0.20.101) — GPU node (Tesla T4)
+ ├── k8s-node2 (10.0.20.102) — worker
+ ├── k8s-node3 (10.0.20.103) — worker
+ ├── k8s-node4 (10.0.20.104) — worker
+ ├── TrueNAS (10.0.10.15) — storage (NFS + iSCSI)
+ └── pfSense (10.0.20.1) — firewall + gateway
+
+
+
+ Networking
+
+ Public domain : viktorbarzin.me — managed by Cloudflare
+ Internal domain : viktorbarzin.lan — managed by Technitium DNS
+ Ingress : Cloudflare → Traefik → services
+ VPN : Headscale (self-hosted Tailscale)
+
+
+
+
+ Storage
+
+ NFS (nfs-truenas) — for app data (files, configs, media). Stored on TrueNAS.
+ iSCSI (iscsi-truenas) — for databases (PostgreSQL, MySQL). Block storage.
+
+
+
+
+ Service Tiers
+ Services are organized into tiers that control resource limits and restart priority:
+
+ Tier Examples Priority
+
+ 0-core Traefik, DNS, VPN, Auth Highest — never evicted
+ 1-cluster Redis, Prometheus, CrowdSec High
+ 2-gpu Ollama, Immich ML, Whisper Medium
+ 3-edge Nextcloud, Paperless, Grafana Normal
+ 4-aux Dashy, PrivateBin, CyberChef Low — evicted first under pressure
+
+
+
+
+
+ Infrastructure as Code
+ Everything is managed with Terraform (via Terragrunt ). Each service has its own stack:
+ stacks/
+ ├── platform/ ← core infra (22 modules)
+ ├── url/ ← URL shortener (Shlink)
+ ├── immich/ ← photo library
+ ├── nextcloud/ ← file storage
+ └── ... (70+ more)
+ Changes go through git: branch → PR → review → merge → CI applies automatically.
+
+
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/contributing/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/contributing/+page.svelte
new file mode 100644
index 00000000..2375a2cb
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/contributing/+page.svelte
@@ -0,0 +1,115 @@
+
+ How to Contribute
+
+
+ Workflow
+
+ Create a branch : git checkout -b fix/my-change
+ Make your changes in stacks/<service>/main.tf
+ Push and open a PR : git push -u origin fix/my-change
+ Viktor reviews and merges
+ CI applies automatically — Slack notification when done
+
+
+
+
+ What you CAN change
+
+ Service configurations (image tags, environment variables, resource limits)
+ New services (add a new stack under stacks/)
+ Ingress routes, health probes, replica counts
+
+
+
+
+ What needs Viktor's review
+
+ CI pipeline changes (.woodpecker/)
+ Terragrunt configuration (terragrunt.hcl)
+ Secrets configuration (.sops.yaml)
+ Core platform modules (stacks/platform/)
+
+
+
+
+
+
+
+ Never kubectl apply/edit/patch — all changes go through Terraform
+ Never put secrets in code — ask Viktor to add them to the encrypted secrets file
+ Never restart NFS on TrueNAS — causes cluster-wide mount failures
+ Never push directly to master — always use a PR
+
+
+
+
+
+ Need a new secret?
+ Comment on your PR: "I need a database password for my-service." Viktor will add it to the encrypted secrets file and push to your branch.
+ Then reference it in your Terraform: var.my_service_db_password
+
+
+
+ Namespace Owner Workflow
+ If you are a namespace owner, you can deploy your own apps:
+
+ Clone the infra repo: git clone https://github.com/ViktorBarzin/infra.git
+ Copy the template: cp -r stacks/_template stacks/your-app
+ Rename: mv stacks/your-app/main.tf.example stacks/your-app/main.tf
+ Edit main.tf — replace all <placeholders>
+ Store secrets in Vault: vault kv put secret/your-username/your-app KEY=value
+ Add your app domain to your domains list in Vault KV
+ Submit a PR, get it reviewed
+ After merge, admin runs terragrunt apply
+
+
+
+
+ CI Pipeline Template
+ Create a .woodpecker.yml in your app's Forgejo repo:
+ {`steps:
+ - name: build
+ image: woodpeckerci/plugin-docker-buildx
+ settings:
+ repo: your-dockerhub-user/myapp
+ tag: ["\${CI_PIPELINE_NUMBER}", "latest"]
+ username:
+ from_secret: dockerhub-username
+ password:
+ from_secret: dockerhub-token
+ platforms: linux/amd64
+
+ - name: deploy
+ image: hashicorp/vault:1.18.1
+ commands:
+ - export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200
+ - export VAULT_TOKEN=$(vault write -field=token auth/kubernetes/login
+ role=ci jwt=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token))
+ - KUBE_TOKEN=$(vault write -field=service_account_token
+ kubernetes/creds/YOUR_NAMESPACE-deployer
+ kubernetes_namespace=YOUR_NAMESPACE)
+ - kubectl --server=https://kubernetes.default.svc
+ --token=$KUBE_TOKEN
+ --certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ -n YOUR_NAMESPACE set image deployment/myapp
+ myapp=your-dockerhub-user/myapp:\${CI_PIPELINE_NUMBER}`}
+
+
+
+ Need a secret for your app?
+ As a namespace owner, you manage your own secrets in Vault:
+ vault kv put secret/your-username/your-app DB_PASSWORD=mysecret API_KEY=abc123
+ Then reference them in your Terraform using a data "vault_kv_secret_v2" block.
+
+
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/download/+server.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/download/+server.ts
new file mode 100644
index 00000000..28981156
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/download/+server.ts
@@ -0,0 +1,58 @@
+import type { RequestHandler } from './$types';
+import { readFileSync } from 'fs';
+
+const CLUSTER_SERVER = 'https://10.0.20.100:6443';
+const OIDC_ISSUER = 'https://authentik.viktorbarzin.me/application/o/kubernetes/';
+const OIDC_CLIENT_ID = 'kubernetes';
+
+export const GET: RequestHandler = async ({ request }) => {
+ const email = request.headers.get('x-authentik-email') || 'user';
+
+ // Read CA cert from mounted ConfigMap
+ let caCert = '';
+ try {
+ caCert = readFileSync('/config/ca.crt', 'utf-8');
+ } catch {
+ // CA cert not available
+ }
+
+ const caCertBase64 = Buffer.from(caCert).toString('base64');
+ const sanitizedEmail = email.replace(/[^a-zA-Z0-9@._-]/g, '');
+
+ const kubeconfig = `apiVersion: v1
+kind: Config
+clusters:
+- cluster:
+ server: ${CLUSTER_SERVER}
+ certificate-authority-data: ${caCertBase64}
+ name: home-cluster
+contexts:
+- context:
+ cluster: home-cluster
+ user: oidc-${sanitizedEmail}
+ name: home-cluster
+current-context: home-cluster
+users:
+- name: oidc-${sanitizedEmail}
+ user:
+ exec:
+ apiVersion: client.authentication.k8s.io/v1beta1
+ command: kubectl
+ args:
+ - oidc-login
+ - get-token
+ - --oidc-issuer-url=${OIDC_ISSUER}
+ - --oidc-client-id=${OIDC_CLIENT_ID}
+ - --oidc-extra-scope=email
+ - --oidc-extra-scope=profile
+ - --oidc-extra-scope=groups
+ interactiveMode: IfAvailable
+`;
+
+ return new Response(kubeconfig, {
+ headers: {
+ 'Content-Type': 'application/yaml',
+ 'Content-Disposition': `attachment; filename="kubeconfig-home-cluster.yaml"`
+ }
+ });
+};
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/onboarding/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/onboarding/+page.svelte
new file mode 100644
index 00000000..812ea24e
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/onboarding/+page.svelte
@@ -0,0 +1,146 @@
+
+
+
+ Getting Started
+ Welcome! Follow these steps to get access to the home Kubernetes cluster.
+
+
+
+
+ Step 0 — Join the VPN
+ The cluster is on a private network (10.0.20.0/24). You need VPN access first.
+
+ Install Tailscale for your OS
+ Run this in your terminal:
+ tailscale login --login-server https://headscale.viktorbarzin.me
+
+ A browser window will open with a registration URL
+ Send that URL to Viktor via email (vbarzin@gmail.com ) or Slack
+ Wait for approval (usually within a few hours)
+ Once approved, test: ping 10.0.20.100
+
+
+
+
+ Step 1 — Log in to the portal
+ Visit k8s-portal.viktorbarzin.me and sign in with your Authentik account.
+ If you don't have an account yet, ask Viktor to create one.
+
+
+
+ Step 2 — Set up kubectl
+ Run one of these commands in your terminal to install everything automatically:
+ macOS
+ Requires Homebrew . Install it first if you don't have it.
+ bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=mac)
+ Linux
+ bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)
+ Windows
+ Use WSL2 and follow the Linux instructions.
+
+
+ {#if showNamespaceOwner}
+
+ Step 3 — Log into Vault
+ Vault manages your secrets and issues dynamic Kubernetes credentials.
+ vault login -method=oidc
+ This opens your browser for Authentik SSO. After login, your token is saved to ~/.vault-token.
+
+
+
+ Step 4 — Verify kubectl access
+ Run this command. It will open your browser for OIDC login the first time:
+ kubectl get pods -n YOUR_NAMESPACE
+ You should see an empty list (no resources) or your running pods.
+
+
+
+ Step 5 — Clone the infra repo
+ git clone https://github.com/ViktorBarzin/infra.git
+cd infra
+ This is where all the infrastructure configuration lives.
+
+
+
+ Step 6 — Create your first app stack
+
+ Copy the template: cp -r stacks/_template stacks/myapp
+mv stacks/myapp/main.tf.example stacks/myapp/main.tf
+ Edit stacks/myapp/main.tf — replace all <placeholders>
+ Store secrets in Vault:
+ vault kv put secret/YOUR_USERNAME/myapp DB_PASSWORD=secret123
+
+ Add your app domain to domains list in Vault KV k8s_users
+ Submit a PR:
+ git checkout -b feat/myapp
+git add stacks/myapp/
+git commit -m "add myapp stack"
+git push -u origin feat/myapp
+
+ Viktor reviews and merges
+ After merge: cd stacks/myapp && terragrunt apply
+
+
+ {:else}
+
+ Step 3 — Verify access
+ Run this command. It will open your browser for login the first time:
+ kubectl get namespaces
+ You should see output like:
+ NAME STATUS AGE
+default Active 200d
+kube-system Active 200d
+monitoring Active 200d
+...
+ If you get a connection error, make sure your VPN is connected (tailscale status).
+
+
+
+ Step 4 — Clone the repo
+ git clone https://github.com/ViktorBarzin/infra.git
+cd infra
+ This is where all the infrastructure configuration lives.
+
+
+
+ Step 5 — Install your AI assistant (optional)
+ Install Codex CLI for AI-assisted cluster management:
+ npm install -g @openai/codex
+ Codex reads the AGENTS.md file in the repo and knows how to work with the cluster.
+
+
+
+ Step 6 — Your first change
+
+ Create a branch: git checkout -b my-first-change
+ Edit a service file (e.g., change an image tag in stacks/echo/main.tf)
+ Commit and push: git add . && git commit -m "my first change" && git push -u origin my-first-change
+ Open a Pull Request on GitHub
+ Viktor reviews and merges
+ Woodpecker CI automatically applies the change to the cluster
+ Slack notification confirms it worked
+
+
+ {/if}
+
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/services/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/services/+page.svelte
new file mode 100644
index 00000000..e9648bde
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/services/+page.svelte
@@ -0,0 +1,58 @@
+
+ Service Catalog
+ 70+ services running on the cluster. Here are the most commonly used:
+
+
+
+
+ User-Facing Services
+
+
+
+
+
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/+page.svelte
new file mode 100644
index 00000000..520681d4
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/+page.svelte
@@ -0,0 +1,69 @@
+
+ Setup Instructions
+
+
+ Quick Setup (one command)
+ Run this in your terminal to install everything and configure kubectl automatically:
+ macOS
+ bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=mac)
+ Linux
+ bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)
+
+
+
+ Manual Setup
+
+ 1. Install kubectl
+ macOS
+ brew install kubectl
+ Linux
+ curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
+chmod +x kubectl && sudo mv kubectl /usr/local/bin/
+
+ 2. Install kubelogin (OIDC plugin)
+ macOS
+ brew install int128/kubelogin/kubelogin
+ Linux
+ curl -LO https://github.com/int128/kubelogin/releases/latest/download/kubelogin_linux_amd64.zip
+unzip kubelogin_linux_amd64.zip && sudo mv kubelogin /usr/local/bin/kubectl-oidc_login
+rm kubelogin_linux_amd64.zip
+
+ 3. Download and use your kubeconfig
+
+mkdir -p ~/.kube
+
+# Download from the portal (requires auth cookie from browser)
+# Or use the download button on the portal homepage
+
+# Set the KUBECONFIG environment variable
+export KUBECONFIG=~/.kube/config-home
+
+# Test access (opens browser for login)
+kubectl get namespaces
+
+
+
+ ← Back to portal
+
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/script/+server.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/script/+server.ts
new file mode 100644
index 00000000..82419194
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/script/+server.ts
@@ -0,0 +1,266 @@
+import type { RequestHandler } from './$types';
+import { readFileSync } from 'fs';
+
+const CLUSTER_SERVER = 'https://10.0.20.100:6443';
+const OIDC_ISSUER = 'https://authentik.viktorbarzin.me/application/o/kubernetes/';
+const OIDC_CLIENT_ID = 'kubernetes';
+
+export const GET: RequestHandler = async ({ url }) => {
+ const os = url.searchParams.get('os') || 'mac';
+
+ let caCert = '';
+ try {
+ caCert = readFileSync('/config/ca.crt', 'utf-8');
+ } catch {
+ // CA cert not available
+ }
+ const caCertBase64 = Buffer.from(caCert).toString('base64');
+
+ const kubeconfigContent = `apiVersion: v1
+kind: Config
+clusters:
+- cluster:
+ server: ${CLUSTER_SERVER}
+ certificate-authority-data: ${caCertBase64}
+ name: home-cluster
+contexts:
+- context:
+ cluster: home-cluster
+ user: oidc-user
+ name: home-cluster
+current-context: home-cluster
+users:
+- name: oidc-user
+ user:
+ exec:
+ apiVersion: client.authentication.k8s.io/v1beta1
+ command: kubectl
+ args:
+ - oidc-login
+ - get-token
+ - --oidc-issuer-url=${OIDC_ISSUER}
+ - --oidc-client-id=${OIDC_CLIENT_ID}
+ - --oidc-extra-scope=email
+ - --oidc-extra-scope=profile
+ - --oidc-extra-scope=groups
+ interactiveMode: IfAvailable`;
+
+ let script: string;
+
+ if (os === 'linux') {
+ script = `#!/bin/bash
+set -e
+
+echo "=== Kubernetes Cluster Setup ==="
+echo ""
+
+# Use sudo if available, otherwise install directly (e.g. in containers running as root)
+SUDO=""
+if [ "$(id -u)" -ne 0 ] && command -v sudo &>/dev/null; then
+ SUDO="sudo"
+fi
+
+# Determine install directory
+INSTALL_DIR="/usr/local/bin"
+if [ ! -w "\$INSTALL_DIR" ] && [ -z "\$SUDO" ]; then
+ INSTALL_DIR="\$HOME/.local/bin"
+ mkdir -p "\$INSTALL_DIR"
+ export PATH="\$INSTALL_DIR:\$PATH"
+fi
+
+# Install kubectl
+if command -v kubectl &>/dev/null; then
+ echo "[OK] kubectl already installed"
+else
+ echo "[..] Installing kubectl..."
+ KUBECTL_VERSION=\$(curl -L -s https://dl.k8s.io/release/stable.txt)
+ curl -fsSLO "https://dl.k8s.io/release/\${KUBECTL_VERSION}/bin/linux/amd64/kubectl"
+ chmod +x kubectl && \$SUDO mv kubectl "\$INSTALL_DIR/"
+ echo "[OK] kubectl installed"
+fi
+
+# Install kubelogin
+if command -v kubectl-oidc_login &>/dev/null; then
+ echo "[OK] kubelogin already installed"
+else
+ echo "[..] Installing kubelogin..."
+ KUBELOGIN_VERSION=\$(curl -fsSL -o /dev/null -w "%{url_effective}" https://github.com/int128/kubelogin/releases/latest | grep -o '[^/]*\$')
+ curl -fsSLO "https://github.com/int128/kubelogin/releases/download/\${KUBELOGIN_VERSION}/kubelogin_linux_amd64.zip"
+ unzip -o kubelogin_linux_amd64.zip kubelogin -d /tmp
+ \$SUDO mv /tmp/kubelogin "\$INSTALL_DIR/kubectl-oidc_login"
+ rm -f kubelogin_linux_amd64.zip
+ echo "[OK] kubelogin installed"
+fi
+
+# Install kubeseal
+if command -v kubeseal &>/dev/null; then
+ echo "[OK] kubeseal already installed"
+else
+ echo "[..] Installing kubeseal..."
+ KUBESEAL_VERSION=\$(curl -fsSL -o /dev/null -w "%{url_effective}" https://github.com/bitnami-labs/sealed-secrets/releases/latest | grep -o '[^/]*\$')
+ curl -fsSLO "https://github.com/bitnami-labs/sealed-secrets/releases/download/\${KUBESEAL_VERSION}/kubeseal-\${KUBESEAL_VERSION#v}-linux-amd64.tar.gz"
+ tar -xzf "kubeseal-\${KUBESEAL_VERSION#v}-linux-amd64.tar.gz" kubeseal
+ \$SUDO mv kubeseal "\$INSTALL_DIR/"
+ rm -f "kubeseal-\${KUBESEAL_VERSION#v}-linux-amd64.tar.gz"
+ echo "[OK] kubeseal installed"
+fi
+
+# Install Vault CLI
+if command -v vault &>/dev/null; then
+ echo "[OK] vault already installed"
+else
+ echo "[..] Installing Vault CLI..."
+ VAULT_VERSION="1.18.1"
+ curl -fsSLO "https://releases.hashicorp.com/vault/\${VAULT_VERSION}/vault_\${VAULT_VERSION}_linux_amd64.zip"
+ unzip -o "vault_\${VAULT_VERSION}_linux_amd64.zip" vault -d /tmp
+ \$SUDO mv /tmp/vault "\$INSTALL_DIR/"
+ rm -f "vault_\${VAULT_VERSION}_linux_amd64.zip"
+ echo "[OK] vault installed"
+fi
+
+# Install Terragrunt
+if command -v terragrunt &>/dev/null; then
+ echo "[OK] terragrunt already installed"
+else
+ echo "[..] Installing terragrunt..."
+ TG_VERSION=\$(curl -fsSL -o /dev/null -w "%{url_effective}" https://github.com/gruntwork-io/terragrunt/releases/latest | grep -o '[^/]*\$')
+ curl -fsSLO "https://github.com/gruntwork-io/terragrunt/releases/download/\${TG_VERSION}/terragrunt_linux_amd64"
+ chmod +x terragrunt_linux_amd64
+ \$SUDO mv terragrunt_linux_amd64 "\$INSTALL_DIR/terragrunt"
+ echo "[OK] terragrunt installed"
+fi
+
+# Install Terraform
+if command -v terraform &>/dev/null; then
+ echo "[OK] terraform already installed"
+else
+ echo "[..] Installing terraform..."
+ TF_VERSION="1.9.8"
+ curl -fsSLO "https://releases.hashicorp.com/terraform/\${TF_VERSION}/terraform_\${TF_VERSION}_linux_amd64.zip"
+ unzip -o "terraform_\${TF_VERSION}_linux_amd64.zip" terraform -d /tmp
+ \$SUDO mv /tmp/terraform "\$INSTALL_DIR/"
+ rm -f "terraform_\${TF_VERSION}_linux_amd64.zip"
+ echo "[OK] terraform installed"
+fi
+
+# Write kubeconfig
+mkdir -p ~/.kube
+cat > ~/.kube/config-home << 'KUBECONFIG_EOF'
+${kubeconfigContent}
+KUBECONFIG_EOF
+echo "[OK] Kubeconfig written to ~/.kube/config-home"
+
+# Add KUBECONFIG to shell profile
+SHELL_RC=~/.bashrc
+[ -f ~/.zshrc ] && SHELL_RC=~/.zshrc
+if ! grep -q 'config-home' "\$SHELL_RC" 2>/dev/null; then
+ echo 'export KUBECONFIG=~/.kube/config-home' >> "\$SHELL_RC"
+ echo "[OK] Added KUBECONFIG to \$SHELL_RC"
+fi
+export KUBECONFIG=~/.kube/config-home
+
+echo ""
+echo "=== Setup complete! ==="
+echo ""
+echo "Run 'kubectl get namespaces' to test (opens browser for login)."
+echo "You may need to restart your shell or run: export KUBECONFIG=~/.kube/config-home"
+`;
+ } else {
+ script = `#!/bin/bash
+set -e
+
+echo "=== Kubernetes Cluster Setup ==="
+echo ""
+
+# Check for Homebrew
+if ! command -v brew &>/dev/null; then
+ echo "[!!] Homebrew not found. Install it first:"
+ echo ' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
+ exit 1
+fi
+
+# Install kubectl
+if command -v kubectl &>/dev/null; then
+ echo "[OK] kubectl already installed ($(kubectl version --client -o json 2>/dev/null | grep -o '"gitVersion":"[^"]*"' | cut -d'"' -f4))"
+else
+ echo "[..] Installing kubectl..."
+ brew install kubectl
+ echo "[OK] kubectl installed"
+fi
+
+# Install kubelogin
+if command -v kubectl-oidc_login &>/dev/null; then
+ echo "[OK] kubelogin already installed"
+else
+ echo "[..] Installing kubelogin..."
+ brew install int128/kubelogin/kubelogin
+ echo "[OK] kubelogin installed"
+fi
+
+# Install kubeseal
+if command -v kubeseal &>/dev/null; then
+ echo "[OK] kubeseal already installed"
+else
+ echo "[..] Installing kubeseal..."
+ brew install kubeseal
+ echo "[OK] kubeseal installed"
+fi
+
+# Install Vault CLI
+if command -v vault &>/dev/null; then
+ echo "[OK] vault already installed"
+else
+ echo "[..] Installing Vault CLI..."
+ brew tap hashicorp/tap
+ brew install hashicorp/tap/vault
+ echo "[OK] vault installed"
+fi
+
+# Install Terragrunt
+if command -v terragrunt &>/dev/null; then
+ echo "[OK] terragrunt already installed"
+else
+ echo "[..] Installing terragrunt..."
+ brew install terragrunt
+ echo "[OK] terragrunt installed"
+fi
+
+# Install Terraform
+if command -v terraform &>/dev/null; then
+ echo "[OK] terraform already installed"
+else
+ echo "[..] Installing terraform..."
+ brew install hashicorp/tap/terraform
+ echo "[OK] terraform installed"
+fi
+
+# Write kubeconfig
+mkdir -p ~/.kube
+cat > ~/.kube/config-home << 'KUBECONFIG_EOF'
+${kubeconfigContent}
+KUBECONFIG_EOF
+echo "[OK] Kubeconfig written to ~/.kube/config-home"
+
+# Add KUBECONFIG to shell profile
+SHELL_RC=~/.zshrc
+[ ! -f ~/.zshrc ] && SHELL_RC=~/.bashrc
+if ! grep -q 'config-home' "\$SHELL_RC" 2>/dev/null; then
+ echo 'export KUBECONFIG=~/.kube/config-home' >> "\$SHELL_RC"
+ echo "[OK] Added KUBECONFIG to \$SHELL_RC"
+fi
+export KUBECONFIG=~/.kube/config-home
+
+echo ""
+echo "=== Setup complete! ==="
+echo ""
+echo "Run 'kubectl get namespaces' to test (opens browser for login)."
+echo "You may need to restart your shell or run: export KUBECONFIG=~/.kube/config-home"
+`;
+ }
+
+ return new Response(script, {
+ headers: {
+ 'Content-Type': 'text/plain; charset=utf-8'
+ }
+ });
+};
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/troubleshooting/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/troubleshooting/+page.svelte
new file mode 100644
index 00000000..17ac2e5a
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/troubleshooting/+page.svelte
@@ -0,0 +1,63 @@
+
+ Troubleshooting
+
+
+ "kubectl can't connect to the server"
+
+ Check your VPN: tailscale status — should show "connected"
+ Check KUBECONFIG: echo $KUBECONFIG — should be ~/.kube/config-home
+ Test connectivity: ping 10.0.20.100
+ If ping works but kubectl doesn't, re-run the setup script
+
+
+
+
+ "Forbidden" or "Permission denied"
+ You may not have access to that namespace. Your access is scoped to specific namespaces.
+ Try: kubectl get namespaces to see which namespaces you can access.
+ Need access to another namespace? Ask Viktor.
+
+
+
+ "Pod is CrashLoopBackOff"
+
+ Check pod logs: kubectl logs -n <namespace> <pod-name> --tail=50
+ Check previous crash: kubectl logs -n <namespace> <pod-name> --previous
+ Check events: kubectl describe pod -n <namespace> <pod-name>
+ Common causes: OOMKilled (need more memory), bad config, database connection failure
+
+
+
+
+ "PR CI failed"
+
+ Check the Woodpecker CI dashboard: ci.viktorbarzin.me
+ Read the build logs — the error is usually at the bottom
+ Fix the issue, commit, and push — CI will re-run
+
+
+
+
+ "I need a new secret / database password"
+ Secrets are managed by Viktor in an encrypted file. You cannot add them yourself.
+
+ Comment on your PR: "Need DB password for <service>"
+ Viktor adds the secret and pushes to your branch
+ Reference it as var.<service>_db_password in your Terraform
+
+
+
+
+
+
+
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/static/robots.txt b/stacks/k8s-portal/modules/k8s-portal/files/static/robots.txt
new file mode 100644
index 00000000..b6dd6670
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/static/robots.txt
@@ -0,0 +1,3 @@
+# allow crawling everything by default
+User-agent: *
+Disallow:
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/svelte.config.js b/stacks/k8s-portal/modules/k8s-portal/files/svelte.config.js
new file mode 100644
index 00000000..6bfb3c40
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/svelte.config.js
@@ -0,0 +1,10 @@
+import adapter from '@sveltejs/adapter-node';
+
+/** @type {import('@sveltejs/kit').Config} */
+const config = {
+ kit: {
+ adapter: adapter()
+ }
+};
+
+export default config;
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/tsconfig.json b/stacks/k8s-portal/modules/k8s-portal/files/tsconfig.json
new file mode 100644
index 00000000..2c2ed3c4
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/tsconfig.json
@@ -0,0 +1,20 @@
+{
+ "extends": "./.svelte-kit/tsconfig.json",
+ "compilerOptions": {
+ "rewriteRelativeImportExtensions": true,
+ "allowJs": true,
+ "checkJs": true,
+ "esModuleInterop": true,
+ "forceConsistentCasingInFileNames": true,
+ "resolveJsonModule": true,
+ "skipLibCheck": true,
+ "sourceMap": true,
+ "strict": true,
+ "moduleResolution": "bundler"
+ }
+ // Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias
+ // except $lib which is handled by https://svelte.dev/docs/kit/configuration#files
+ //
+ // To make changes to top-level options such as include and exclude, we recommend extending
+ // the generated config; see https://svelte.dev/docs/kit/configuration#typescript
+}
diff --git a/stacks/k8s-portal/modules/k8s-portal/files/vite.config.ts b/stacks/k8s-portal/modules/k8s-portal/files/vite.config.ts
new file mode 100644
index 00000000..bbf8c7da
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/files/vite.config.ts
@@ -0,0 +1,6 @@
+import { sveltekit } from '@sveltejs/kit/vite';
+import { defineConfig } from 'vite';
+
+export default defineConfig({
+ plugins: [sveltekit()]
+});
diff --git a/stacks/k8s-portal/modules/k8s-portal/main.tf b/stacks/k8s-portal/modules/k8s-portal/main.tf
new file mode 100644
index 00000000..53c375d6
--- /dev/null
+++ b/stacks/k8s-portal/modules/k8s-portal/main.tf
@@ -0,0 +1,166 @@
+variable "tls_secret_name" {}
+variable "tier" { type = string }
+variable "k8s_ca_cert" {
+ type = string
+ default = ""
+}
+
+resource "kubernetes_namespace" "k8s_portal" {
+ metadata {
+ name = "k8s-portal"
+ labels = {
+ tier = var.tier
+ }
+ }
+}
+
+module "tls_secret" {
+ source = "../../../../modules/kubernetes/setup_tls_secret"
+ namespace = kubernetes_namespace.k8s_portal.metadata[0].name
+ tls_secret_name = var.tls_secret_name
+}
+
+resource "kubernetes_config_map" "k8s_portal_config" {
+ metadata {
+ name = "k8s-portal-config"
+ namespace = kubernetes_namespace.k8s_portal.metadata[0].name
+ }
+
+ data = {
+ "ca.crt" = var.k8s_ca_cert
+ }
+}
+
+resource "kubernetes_deployment" "k8s_portal" {
+ metadata {
+ name = "k8s-portal"
+ namespace = kubernetes_namespace.k8s_portal.metadata[0].name
+ labels = {
+ app = "k8s-portal"
+ tier = var.tier
+ }
+ }
+
+ spec {
+ replicas = 1
+ strategy {
+ type = "Recreate"
+ }
+ revision_history_limit = 3
+ selector {
+ match_labels = {
+ app = "k8s-portal"
+ }
+ }
+
+ template {
+ metadata {
+ labels = {
+ app = "k8s-portal"
+ }
+ }
+
+ spec {
+ container {
+ name = "portal"
+ image = "viktorbarzin/k8s-portal:latest"
+ port {
+ container_port = 3000
+ }
+
+ volume_mount {
+ name = "config"
+ mount_path = "/config/ca.crt"
+ sub_path = "ca.crt"
+ read_only = true
+ }
+ volume_mount {
+ name = "user-roles"
+ mount_path = "/config/users.json"
+ sub_path = "users.json"
+ read_only = true
+ }
+ resources {
+ requests = {
+ cpu = "10m"
+ memory = "128Mi"
+ }
+ limits = {
+ memory = "128Mi"
+ }
+ }
+ }
+
+ volume {
+ name = "config"
+ config_map {
+ name = kubernetes_config_map.k8s_portal_config.metadata[0].name
+ }
+ }
+ volume {
+ name = "user-roles"
+ config_map {
+ name = "k8s-user-roles"
+ }
+ }
+ dns_config {
+ option {
+ name = "ndots"
+ value = "2"
+ }
+ }
+ }
+ }
+ }
+ lifecycle {
+ ignore_changes = [
+ spec[0].template[0].spec[0].dns_config,
+ spec[0].template[0].spec[0].container[0].image, # CI updates image tag
+ ]
+ }
+}
+
+resource "kubernetes_service" "k8s_portal" {
+ metadata {
+ name = "k8s-portal"
+ namespace = kubernetes_namespace.k8s_portal.metadata[0].name
+ }
+
+ spec {
+ selector = {
+ app = "k8s-portal"
+ }
+ port {
+ port = 80
+ target_port = 3000
+ }
+ }
+}
+
+module "ingress" {
+ source = "../../../../modules/kubernetes/ingress_factory"
+ namespace = kubernetes_namespace.k8s_portal.metadata[0].name
+ name = "k8s-portal"
+ tls_secret_name = var.tls_secret_name
+ protected = true # Require Authentik login
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "K8s Portal"
+ "gethomepage.dev/description" = "Kubernetes portal"
+ "gethomepage.dev/icon" = "kubernetes.png"
+ "gethomepage.dev/group" = "Core Platform"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+# Unprotected ingress for the setup script and agent endpoint (needs to be curl-able without auth)
+module "ingress_setup_script" {
+ source = "../../../../modules/kubernetes/ingress_factory"
+ namespace = kubernetes_namespace.k8s_portal.metadata[0].name
+ name = "k8s-portal-setup"
+ host = "k8s-portal"
+ service_name = "k8s-portal"
+ ingress_path = ["/setup/script", "/agent"]
+ tls_secret_name = var.tls_secret_name
+ protected = false
+}
diff --git a/stacks/k8s-portal/secrets b/stacks/k8s-portal/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/k8s-portal/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/k8s-portal/terragrunt.hcl b/stacks/k8s-portal/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/k8s-portal/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/k8s-portal/tiers.tf b/stacks/k8s-portal/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/k8s-portal/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/metallb/main.tf b/stacks/metallb/main.tf
new file mode 100644
index 00000000..54d21f69
--- /dev/null
+++ b/stacks/metallb/main.tf
@@ -0,0 +1,4 @@
+module "metallb" {
+ source = "./modules/metallb"
+ tier = local.tiers.core
+}
diff --git a/stacks/metallb/modules/metallb/main.tf b/stacks/metallb/modules/metallb/main.tf
new file mode 100644
index 00000000..1659f08e
--- /dev/null
+++ b/stacks/metallb/modules/metallb/main.tf
@@ -0,0 +1,40 @@
+# Creates namespace and everythin needed
+# Do not use until https://github.com/colinwilson/terraform-kubernetes-metallb/issues/5 is solved
+# module "metallb" {
+# source = "colinwilson/metallb/kubernetes"
+# version = "0.1.7"
+# }
+variable "tier" { type = string }
+
+resource "kubernetes_namespace" "metallb" {
+ metadata {
+ name = "metallb-system"
+ labels = {
+ app = "metallb"
+ # "istio-injection" : "disabled"
+ # tier = var.tier
+ }
+ }
+}
+
+module "metallb" {
+ source = "ViktorBarzin/metallb/kubernetes"
+ version = "0.1.5"
+ depends_on = [kubernetes_namespace.metallb]
+}
+
+resource "kubernetes_config_map" "config" {
+ metadata {
+ name = "config"
+ namespace = kubernetes_namespace.metallb.metadata[0].name
+ }
+ data = {
+ config = < user if user.role == "admin" })
+
+ metadata {
+ name = "oidc-admin-${each.key}"
+ }
+
+ role_ref {
+ api_group = "rbac.authorization.k8s.io"
+ kind = "ClusterRole"
+ name = "cluster-admin"
+ }
+
+ subject {
+ kind = "User"
+ name = each.value.email
+ api_group = "rbac.authorization.k8s.io"
+ }
+}
+
+# --- Power-user role ---
+# Can manage workloads cluster-wide but cannot modify RBAC, nodes, or persistent volumes
+
+resource "kubernetes_cluster_role" "power_user" {
+ metadata {
+ name = "oidc-power-user"
+ }
+
+ # Core resources
+ rule {
+ api_groups = [""]
+ resources = ["pods", "pods/log", "pods/exec", "services", "endpoints", "configmaps", "secrets", "persistentvolumeclaims", "events", "namespaces"]
+ verbs = ["get", "list", "watch"]
+ }
+
+ rule {
+ api_groups = [""]
+ resources = ["pods", "services", "configmaps", "secrets", "persistentvolumeclaims"]
+ verbs = ["create", "update", "patch", "delete"]
+ }
+
+ # Apps
+ rule {
+ api_groups = ["apps"]
+ resources = ["deployments", "statefulsets", "daemonsets", "replicasets"]
+ verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
+ }
+
+ # Batch
+ rule {
+ api_groups = ["batch"]
+ resources = ["jobs", "cronjobs"]
+ verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
+ }
+
+ # Networking
+ rule {
+ api_groups = ["networking.k8s.io"]
+ resources = ["ingresses", "networkpolicies"]
+ verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
+ }
+
+ # Autoscaling
+ rule {
+ api_groups = ["autoscaling"]
+ resources = ["horizontalpodautoscalers"]
+ verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
+ }
+
+ # Read-only on cluster-level resources
+ rule {
+ api_groups = [""]
+ resources = ["nodes"]
+ verbs = ["get", "list", "watch"]
+ }
+
+ rule {
+ api_groups = ["storage.k8s.io"]
+ resources = ["storageclasses"]
+ verbs = ["get", "list", "watch"]
+ }
+
+ rule {
+ api_groups = ["rbac.authorization.k8s.io"]
+ resources = ["clusterroles", "clusterrolebindings", "roles", "rolebindings"]
+ verbs = ["get", "list", "watch"]
+ }
+}
+
+resource "kubernetes_cluster_role_binding" "power_users" {
+ for_each = nonsensitive({ for name, user in var.k8s_users : name => user if user.role == "power-user" })
+
+ metadata {
+ name = "oidc-power-user-${each.key}"
+ }
+
+ role_ref {
+ api_group = "rbac.authorization.k8s.io"
+ kind = "ClusterRole"
+ name = kubernetes_cluster_role.power_user.metadata[0].name
+ }
+
+ subject {
+ kind = "User"
+ name = each.value.email
+ api_group = "rbac.authorization.k8s.io"
+ }
+}
+
+# --- Namespace-owner role ---
+# Full admin within assigned namespaces + read-only cluster-wide
+
+locals {
+ # Flatten user->namespace pairs for iteration
+ namespace_owner_pairs = flatten([
+ for name, user in var.k8s_users : [
+ for ns in user.namespaces : {
+ user_key = name
+ namespace = ns
+ email = user.email
+ quota = user.quota
+ }
+ ] if user.role == "namespace-owner"
+ ])
+}
+
+resource "kubernetes_role_binding" "namespace_owner" {
+ for_each = nonsensitive({ for pair in local.namespace_owner_pairs : "${pair.user_key}-${pair.namespace}" => pair })
+
+ metadata {
+ name = "namespace-owner-${each.value.user_key}"
+ namespace = each.value.namespace
+ }
+
+ role_ref {
+ api_group = "rbac.authorization.k8s.io"
+ kind = "ClusterRole"
+ name = "admin" # Built-in ClusterRole with full namespace access
+ }
+
+ subject {
+ kind = "User"
+ name = each.value.email
+ api_group = "rbac.authorization.k8s.io"
+ }
+}
+
+# Read-only cluster-wide access for namespace owners
+resource "kubernetes_cluster_role" "namespace_owner_readonly" {
+ metadata {
+ name = "oidc-namespace-owner-readonly"
+ }
+
+ rule {
+ api_groups = [""]
+ resources = ["namespaces", "nodes"]
+ verbs = ["get", "list", "watch"]
+ }
+
+ rule {
+ api_groups = [""]
+ resources = ["pods", "services", "configmaps", "events"]
+ verbs = ["get", "list", "watch"]
+ }
+
+ rule {
+ api_groups = ["apps"]
+ resources = ["deployments", "statefulsets", "daemonsets"]
+ verbs = ["get", "list", "watch"]
+ }
+}
+
+resource "kubernetes_cluster_role_binding" "namespace_owner_readonly" {
+ for_each = nonsensitive({ for name, user in var.k8s_users : name => user if user.role == "namespace-owner" })
+
+ metadata {
+ name = "oidc-ns-owner-readonly-${each.key}"
+ }
+
+ role_ref {
+ api_group = "rbac.authorization.k8s.io"
+ kind = "ClusterRole"
+ name = kubernetes_cluster_role.namespace_owner_readonly.metadata[0].name
+ }
+
+ subject {
+ kind = "User"
+ name = each.value.email
+ api_group = "rbac.authorization.k8s.io"
+ }
+}
+
+# Resource quotas per user namespace
+resource "kubernetes_resource_quota" "user_namespace_quota" {
+ for_each = nonsensitive({ for pair in local.namespace_owner_pairs : "${pair.user_key}-${pair.namespace}" => pair })
+
+ metadata {
+ name = "user-quota"
+ namespace = each.value.namespace
+ }
+
+ spec {
+ hard = {
+ "requests.cpu" = each.value.quota.cpu_requests
+ "requests.memory" = each.value.quota.memory_requests
+ "limits.memory" = each.value.quota.memory_limits
+ "pods" = each.value.quota.pods
+ }
+ }
+
+ depends_on = [kubernetes_role_binding.namespace_owner]
+}
+
+# ConfigMap with user-role mapping for the self-service portal
+resource "kubernetes_config_map" "user_roles" {
+ metadata {
+ name = "k8s-user-roles"
+ namespace = "k8s-portal"
+ }
+
+ data = {
+ "users.json" = jsonencode({
+ for name, user in var.k8s_users : user.email => {
+ role = user.role
+ namespaces = user.namespaces
+ }
+ })
+ }
+}
+
+# TLS secret in each user namespace (so they can create HTTPS ingresses)
+module "user_namespace_tls" {
+ for_each = nonsensitive(toset(flatten([
+ for name, user in var.k8s_users : user.namespaces
+ if user.role == "namespace-owner"
+ ])))
+
+ source = "../../../../modules/kubernetes/setup_tls_secret"
+ namespace = each.value
+ tls_secret_name = var.tls_secret_name
+}
diff --git a/stacks/rbac/secrets b/stacks/rbac/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/rbac/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/rbac/terragrunt.hcl b/stacks/rbac/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/rbac/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/rbac/tiers.tf b/stacks/rbac/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/rbac/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/redis/main.tf b/stacks/redis/main.tf
new file mode 100644
index 00000000..d6e7c8f9
--- /dev/null
+++ b/stacks/redis/main.tf
@@ -0,0 +1,9 @@
+variable "tls_secret_name" { type = string }
+variable "nfs_server" { type = string }
+
+module "redis" {
+ source = "./modules/redis"
+ tls_secret_name = var.tls_secret_name
+ nfs_server = var.nfs_server
+ tier = local.tiers.cluster
+}
diff --git a/stacks/redis/modules/redis/main.tf b/stacks/redis/modules/redis/main.tf
new file mode 100644
index 00000000..5f5c5966
--- /dev/null
+++ b/stacks/redis/modules/redis/main.tf
@@ -0,0 +1,310 @@
+variable "tls_secret_name" {}
+variable "tier" { type = string }
+variable "nfs_server" { type = string }
+
+resource "kubernetes_namespace" "redis" {
+ metadata {
+ name = "redis"
+ labels = {
+ tier = var.tier
+ }
+ }
+}
+
+module "tls_secret" {
+ source = "../../../../modules/kubernetes/setup_tls_secret"
+ namespace = kubernetes_namespace.redis.metadata[0].name
+ tls_secret_name = var.tls_secret_name
+}
+
+# Redis with Sentinel HA via Bitnami Helm chart
+# Architecture: 1 master + 1 replica + 2 sentinels (one per node)
+# Sentinel automatically promotes a replica if master fails
+# HAProxy sits in front and routes only to the current master (see below)
+resource "helm_release" "redis" {
+ namespace = kubernetes_namespace.redis.metadata[0].name
+ create_namespace = false
+ name = "redis"
+ atomic = true
+ timeout = 600
+
+ repository = "oci://10.0.20.10:5000/bitnamicharts"
+ chart = "redis"
+ version = "25.3.2"
+
+ values = [yamlencode({
+ architecture = "replication"
+
+ auth = {
+ enabled = false
+ }
+
+ sentinel = {
+ enabled = true
+ quorum = 2
+ masterSet = "mymaster"
+ automateCluster = true
+
+ resources = {
+ requests = {
+ cpu = "50m"
+ memory = "64Mi"
+ }
+ limits = {
+ memory = "64Mi"
+ }
+ }
+ }
+
+ master = {
+ persistence = {
+ enabled = true
+ storageClass = "iscsi-truenas"
+ size = "2Gi"
+ }
+
+ resources = {
+ requests = {
+ cpu = "100m"
+ memory = "64Mi"
+ }
+ limits = {
+ memory = "64Mi"
+ }
+ }
+ }
+
+ replica = {
+ replicaCount = 2
+
+ persistence = {
+ enabled = true
+ storageClass = "iscsi-truenas"
+ size = "2Gi"
+ }
+
+ resources = {
+ requests = {
+ cpu = "50m"
+ memory = "64Mi"
+ }
+ limits = {
+ memory = "64Mi"
+ }
+ }
+ }
+
+ # Metrics for Prometheus
+ metrics = {
+ enabled = false
+ }
+
+ # Use the existing service name so clients don't need changes
+ # Sentinel-enabled Bitnami chart creates a headless service
+ # and a regular service pointing at the master
+ nameOverride = "redis"
+ })]
+}
+
+# HAProxy-based master-only proxy for simple redis:// clients.
+# Health-checks each Redis node via INFO replication and only routes
+# to the current master. On Sentinel failover, HAProxy detects the
+# new master within seconds via its health check interval.
+# Previously this was a K8s Service that routed to all nodes, causing
+# READONLY errors when clients hit a replica.
+
+resource "kubernetes_config_map" "haproxy" {
+ metadata {
+ name = "redis-haproxy"
+ namespace = kubernetes_namespace.redis.metadata[0].name
+ }
+ data = {
+ "haproxy.cfg" = <<-EOT
+ global
+ maxconn 256
+
+ defaults
+ mode tcp
+ timeout connect 5s
+ timeout client 30s
+ timeout server 30s
+ timeout check 3s
+
+ frontend redis_front
+ bind *:6379
+ default_backend redis_master
+
+ frontend sentinel_front
+ bind *:26379
+ default_backend redis_sentinel
+
+ backend redis_master
+ option tcp-check
+ tcp-check connect
+ tcp-check send "PING\r\n"
+ tcp-check expect string +PONG
+ tcp-check send "INFO replication\r\n"
+ tcp-check expect string role:master
+ tcp-check send "QUIT\r\n"
+ tcp-check expect string +OK
+ server redis-node-0 redis-node-0.redis-headless.redis.svc.cluster.local:6379 check inter 3s fall 3 rise 2
+ server redis-node-1 redis-node-1.redis-headless.redis.svc.cluster.local:6379 check inter 3s fall 3 rise 2
+
+ backend redis_sentinel
+ balance roundrobin
+ server redis-node-0 redis-node-0.redis-headless.redis.svc.cluster.local:26379 check inter 5s
+ server redis-node-1 redis-node-1.redis-headless.redis.svc.cluster.local:26379 check inter 5s
+ EOT
+ }
+}
+
+resource "kubernetes_deployment" "haproxy" {
+ metadata {
+ name = "redis-haproxy"
+ namespace = kubernetes_namespace.redis.metadata[0].name
+ labels = {
+ app = "redis-haproxy"
+ }
+ }
+ spec {
+ replicas = 2
+ selector {
+ match_labels = {
+ app = "redis-haproxy"
+ }
+ }
+ template {
+ metadata {
+ labels = {
+ app = "redis-haproxy"
+ }
+ }
+ spec {
+ container {
+ name = "haproxy"
+ image = "docker.io/library/haproxy:3.1-alpine"
+ port {
+ container_port = 6379
+ name = "redis"
+ }
+ port {
+ container_port = 26379
+ name = "sentinel"
+ }
+ volume_mount {
+ name = "config"
+ mount_path = "/usr/local/etc/haproxy"
+ read_only = true
+ }
+ resources {
+ requests = {
+ cpu = "10m"
+ memory = "16Mi"
+ }
+ limits = {
+ memory = "16Mi"
+ }
+ }
+ liveness_probe {
+ tcp_socket {
+ port = 6379
+ }
+ initial_delay_seconds = 5
+ period_seconds = 10
+ }
+ }
+ volume {
+ name = "config"
+ config_map {
+ name = kubernetes_config_map.haproxy.metadata[0].name
+ }
+ }
+ }
+ }
+ }
+
+ depends_on = [helm_release.redis]
+}
+
+resource "kubernetes_service" "redis" {
+ metadata {
+ name = "redis"
+ namespace = kubernetes_namespace.redis.metadata[0].name
+ }
+ spec {
+ selector = {
+ app = "redis-haproxy"
+ }
+ port {
+ name = "tcp-redis"
+ port = 6379
+ target_port = 6379
+ }
+ port {
+ name = "tcp-sentinel"
+ port = 26379
+ target_port = 26379
+ }
+ }
+
+ depends_on = [kubernetes_deployment.haproxy]
+}
+
+module "nfs_backup" {
+ source = "../../../../modules/kubernetes/nfs_volume"
+ name = "redis-backup"
+ namespace = kubernetes_namespace.redis.metadata[0].name
+ nfs_server = var.nfs_server
+ nfs_path = "/mnt/main/redis-backup"
+}
+
+# Hourly backup: copy RDB snapshot from master to NFS
+resource "kubernetes_cron_job_v1" "redis-backup" {
+ metadata {
+ name = "redis-backup"
+ namespace = kubernetes_namespace.redis.metadata[0].name
+ }
+ spec {
+ concurrency_policy = "Replace"
+ failed_jobs_history_limit = 3
+ schedule = "0 * * * *"
+ starting_deadline_seconds = 10
+ successful_jobs_history_limit = 3
+ job_template {
+ metadata {}
+ spec {
+ backoff_limit = 2
+ ttl_seconds_after_finished = 60
+ template {
+ metadata {}
+ spec {
+ container {
+ name = "redis-backup"
+ image = "redis:7-alpine"
+ command = ["/bin/sh", "-c", <<-EOT
+ set -eux
+ # Trigger a fresh RDB save on the master
+ redis-cli -h redis.redis BGSAVE
+ sleep 5
+ # Copy the RDB via redis-cli --rdb
+ redis-cli -h redis.redis --rdb /backup/dump.rdb
+ echo "Backup complete: $(ls -lh /backup/dump.rdb)"
+ EOT
+ ]
+ volume_mount {
+ name = "backup"
+ mount_path = "/backup"
+ }
+ }
+ volume {
+ name = "backup"
+ persistent_volume_claim {
+ claim_name = module.nfs_backup.claim_name
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/stacks/redis/secrets b/stacks/redis/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/redis/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/redis/terragrunt.hcl b/stacks/redis/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/redis/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/redis/tiers.tf b/stacks/redis/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/redis/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/reverse-proxy/main.tf b/stacks/reverse-proxy/main.tf
new file mode 100644
index 00000000..2416792a
--- /dev/null
+++ b/stacks/reverse-proxy/main.tf
@@ -0,0 +1,18 @@
+variable "tls_secret_name" { type = string }
+
+data "vault_kv_secret_v2" "secrets" {
+ mount = "secret"
+ name = "platform"
+}
+
+locals {
+ homepage_credentials = jsondecode(data.vault_kv_secret_v2.secrets.data["homepage_credentials"])
+}
+
+module "reverse-proxy" {
+ source = "./modules/reverse_proxy"
+ tls_secret_name = var.tls_secret_name
+ truenas_homepage_token = local.homepage_credentials["reverse_proxy"]["truenas_token"]
+ pfsense_homepage_token = local.homepage_credentials["reverse_proxy"]["pfsense_token"]
+ haos_homepage_token = try(local.homepage_credentials["home_assistant"]["token"], "")
+}
diff --git a/stacks/reverse-proxy/modules/reverse_proxy/factory/main.tf b/stacks/reverse-proxy/modules/reverse_proxy/factory/main.tf
new file mode 100644
index 00000000..1af42844
--- /dev/null
+++ b/stacks/reverse-proxy/modules/reverse_proxy/factory/main.tf
@@ -0,0 +1,163 @@
+variable "name" {}
+variable "namespace" {
+ default = "reverse-proxy"
+}
+variable "external_name" {}
+variable "port" {
+ default = "80"
+}
+variable "tls_secret_name" {}
+variable "backend_protocol" {
+ default = "HTTP"
+}
+variable "protected" {
+ type = bool
+ default = true
+}
+variable "ingress_path" {
+ type = list(string)
+ default = ["/"]
+}
+variable "max_body_size" {
+ type = string
+ default = "50m"
+}
+variable "extra_annotations" {
+ default = {}
+}
+variable "rybbit_site_id" {
+ default = null
+ type = string
+}
+variable "custom_content_security_policy" {
+ default = null
+ type = string
+}
+variable "strip_auth_headers" {
+ type = bool
+ default = false
+}
+variable "extra_middlewares" {
+ type = list(string)
+ default = []
+}
+
+
+resource "kubernetes_service" "proxied-service" {
+ metadata {
+ name = var.name
+ namespace = var.namespace
+ labels = {
+ "app" = var.name
+ }
+ }
+
+ spec {
+ type = "ExternalName"
+ external_name = var.external_name
+
+ port {
+ name = var.backend_protocol == "HTTPS" ? "https-${var.name}" : "${var.name}-web"
+ port = var.port
+ protocol = "TCP"
+ target_port = var.port
+ }
+ }
+}
+
+resource "kubernetes_ingress_v1" "proxied-ingress" {
+ metadata {
+ name = var.name
+ namespace = var.namespace
+ annotations = merge({
+ "traefik.ingress.kubernetes.io/router.middlewares" = join(",", compact(concat([
+ "traefik-rate-limit@kubernetescrd",
+ var.custom_content_security_policy == null ? "traefik-csp-headers@kubernetescrd" : null,
+ "traefik-crowdsec@kubernetescrd",
+ var.protected ? "traefik-authentik-forward-auth@kubernetescrd" : null,
+ var.strip_auth_headers ? "traefik-strip-auth-headers@kubernetescrd" : null,
+ var.rybbit_site_id != null ? "traefik-strip-accept-encoding@kubernetescrd" : null,
+ var.rybbit_site_id != null ? "${var.namespace}-rybbit-analytics-${var.name}@kubernetescrd" : null,
+ var.custom_content_security_policy != null ? "${var.namespace}-custom-csp-${var.name}@kubernetescrd" : null,
+ ], var.extra_middlewares)))
+ "traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
+ "traefik.ingress.kubernetes.io/service.serversscheme" = var.backend_protocol == "HTTPS" ? "https" : null
+ "traefik.ingress.kubernetes.io/service.serverstransport" = var.backend_protocol == "HTTPS" ? "traefik-insecure-skip-verify@kubernetescrd" : null
+ }, var.extra_annotations)
+ }
+
+ spec {
+ ingress_class_name = "traefik"
+ tls {
+ hosts = ["${var.name}.viktorbarzin.me"]
+ secret_name = var.tls_secret_name
+ }
+ rule {
+ host = "${var.name}.viktorbarzin.me"
+ http {
+ dynamic "path" {
+ for_each = var.ingress_path
+
+ content {
+ path = path.value
+ backend {
+ service {
+
+ name = var.name
+ port {
+ number = var.port
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+# Rybbit analytics middleware (rewrite-body plugin with content-type filtering) - created per service when rybbit_site_id is set
+resource "kubernetes_manifest" "rybbit_analytics" {
+ count = var.rybbit_site_id != null ? 1 : 0
+
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "rybbit-analytics-${var.name}"
+ namespace = var.namespace
+ }
+ spec = {
+ plugin = {
+ rewrite-body = {
+ rewrites = [{
+ regex = ""
+ replacement = ""
+ }]
+ monitoring = {
+ types = ["text/html"]
+ }
+ }
+ }
+ }
+ }
+}
+
+# Custom CSP headers middleware - created per service when custom_content_security_policy is set
+resource "kubernetes_manifest" "custom_csp" {
+ count = var.custom_content_security_policy != null ? 1 : 0
+
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "custom-csp-${var.name}"
+ namespace = var.namespace
+ }
+ spec = {
+ headers = {
+ contentSecurityPolicy = var.custom_content_security_policy
+ }
+ }
+ }
+}
diff --git a/stacks/reverse-proxy/modules/reverse_proxy/main.tf b/stacks/reverse-proxy/modules/reverse_proxy/main.tf
new file mode 100644
index 00000000..e6dcc34b
--- /dev/null
+++ b/stacks/reverse-proxy/modules/reverse_proxy/main.tf
@@ -0,0 +1,356 @@
+# Reverse proxy for things in my infra that are
+# outside of K8S but would be nice to use the Nginx-ingress
+
+variable "tls_secret_name" {}
+variable "truenas_homepage_token" {}
+variable "pfsense_homepage_token" {}
+variable "haos_homepage_token" {
+ type = string
+ default = ""
+ sensitive = true
+}
+
+resource "kubernetes_namespace" "reverse-proxy" {
+ metadata {
+ name = "reverse-proxy"
+ }
+}
+
+module "tls_secret" {
+ source = "../../../../modules/kubernetes/setup_tls_secret"
+ namespace = "reverse-proxy"
+ tls_secret_name = var.tls_secret_name
+ depends_on = [kubernetes_namespace.reverse-proxy]
+}
+
+# https://pfsense.viktorbarzin.me/
+module "pfsense" {
+ source = "./factory"
+ name = "pfsense"
+ external_name = "pfsense.viktorbarzin.lan"
+ tls_secret_name = var.tls_secret_name
+ port = 443
+ backend_protocol = "HTTPS"
+
+ extra_annotations = {
+ "gethomepage.dev/enabled" : "true"
+ "gethomepage.dev/description" : "Cluster Firewall"
+ "gethomepage.dev/group" : "Identity & Security"
+ "gethomepage.dev/icon" : "pfsense.png"
+ "gethomepage.dev/name" : "pFsense"
+ "gethomepage.dev/widget.type" : "pfsense"
+ "gethomepage.dev/widget.version" : "2"
+ "gethomepage.dev/widget.url" : "https://10.0.20.1"
+ "gethomepage.dev/widget.username" : "admin"
+ "gethomepage.dev/widget.password" : var.pfsense_homepage_token
+ "gethomepage.dev/widget.fields" = "[\"load\", \"memory\", \"temp\", \"disk\"]"
+ "gethomepage.dev/widget.wan" = "vtnet0"
+ }
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ rybbit_site_id = "b029580e5a7c"
+}
+
+# https://nas.viktorbarzin.me/
+module "nas" {
+ source = "./factory"
+ name = "nas"
+ external_name = "nas.viktorbarzin.lan"
+ port = 5001
+ tls_secret_name = var.tls_secret_name
+ backend_protocol = "HTTPS"
+ max_body_size = "0m"
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ rybbit_site_id = "1e11f8449f7d"
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "Synology NAS"
+ "gethomepage.dev/description" = "Network storage"
+ "gethomepage.dev/icon" = "synology.png"
+ "gethomepage.dev/group" = "Infrastructure"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+# https://files.viktorbarzin.me/
+module "nas-files" {
+ source = "./factory"
+ name = "files"
+ external_name = "nas.viktorbarzin.lan"
+ port = 5001
+ tls_secret_name = var.tls_secret_name
+ backend_protocol = "HTTPS"
+ protected = false # allow anyone to download files
+ ingress_path = ["/sharing", "/scripts", "/webman", "/wfmlogindialog.js", "/fsdownload"]
+ max_body_size = "0m"
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ extra_annotations = { "gethomepage.dev/enabled" = "false" }
+}
+
+# https://idrac.viktorbarzin.me/
+module "idrac" {
+ source = "./factory"
+ name = "idrac"
+ external_name = "idrac.viktorbarzin.lan"
+ port = 443
+ tls_secret_name = var.tls_secret_name
+ backend_protocol = "HTTPS"
+ strip_auth_headers = true
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "iDRAC"
+ "gethomepage.dev/description" = "Server management"
+ "gethomepage.dev/icon" = "dell.png"
+ "gethomepage.dev/group" = "Infrastructure"
+ "gethomepage.dev/pod-selector" = ""
+ }
+ depends_on = [kubernetes_namespace.reverse-proxy]
+}
+
+# Can either listen on https or http; can't do both :/
+# TODO: Not working yet
+module "tp-link-gateway" {
+ source = "./factory"
+ name = "gw"
+ external_name = "gw.viktorbarzin.lan"
+ port = 443
+ tls_secret_name = var.tls_secret_name
+ backend_protocol = "HTTPS"
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ protected = true
+ strip_auth_headers = true
+ extra_annotations = { "gethomepage.dev/enabled" = "false" }
+}
+
+# https://truenas.viktorbarzin.me/
+module "truenas" {
+ source = "./factory"
+ name = "truenas"
+ external_name = "truenas.viktorbarzin.lan"
+ port = 80
+ tls_secret_name = var.tls_secret_name
+ max_body_size = "0m"
+
+ extra_annotations = {
+ "gethomepage.dev/enabled" : "true"
+ "gethomepage.dev/description" : "TrueNAS"
+ "gethomepage.dev/group" : "Infrastructure"
+ "gethomepage.dev/icon" : "truenas.png"
+ "gethomepage.dev/name" : "TrueNAS"
+ "gethomepage.dev/widget.type" : "truenas"
+ "gethomepage.dev/widget.url" : "https://truenas.viktorbarzin.lan"
+ "gethomepage.dev/widget.key" : var.truenas_homepage_token
+ # "gethomepage.dev/widget.enablePools" : "true"
+ # "gethomepage.dev/pod-selector" : ""
+ }
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ rybbit_site_id = "b66fbd3cb58a"
+}
+
+# https://r730.viktorbarzin.me/
+module "r730" {
+ source = "./factory"
+ name = "r730"
+ external_name = "r730.viktorbarzin.lan"
+ port = 443
+ tls_secret_name = var.tls_secret_name
+ backend_protocol = "HTTPS"
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "R730"
+ "gethomepage.dev/description" = "Dell PowerEdge server"
+ "gethomepage.dev/icon" = "dell.png"
+ "gethomepage.dev/group" = "Infrastructure"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+# https://proxmox.viktorbarzin.me/
+module "proxmox" {
+ source = "./factory"
+ name = "proxmox"
+ external_name = "proxmox.viktorbarzin.lan"
+ port = 8006
+ tls_secret_name = var.tls_secret_name
+ backend_protocol = "HTTPS"
+ max_body_size = "0" # unlimited
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ rybbit_site_id = "190a7ad3e1c7"
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "Proxmox"
+ "gethomepage.dev/description" = "Hypervisor"
+ "gethomepage.dev/icon" = "proxmox.png"
+ "gethomepage.dev/group" = "Infrastructure"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+# https://registry.viktorbarzin.me/
+module "docker-registry-ui" {
+ source = "./factory"
+ name = "registry"
+ external_name = "docker-registry.viktorbarzin.lan"
+ port = 8080
+ tls_secret_name = var.tls_secret_name
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ extra_annotations = {
+ # Override middleware chain to remove rate-limit; the UI fires many API calls to list repos/tags
+ "traefik.ingress.kubernetes.io/router.middlewares" = "traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd,traefik-authentik-forward-auth@kubernetescrd"
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "Docker Registry"
+ "gethomepage.dev/description" = "Container registry"
+ "gethomepage.dev/icon" = "docker.png"
+ "gethomepage.dev/group" = "Infrastructure"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+# https://valchedrym.viktorbarzin.me/
+module "valchedrym" {
+ source = "./factory"
+ name = "valchedrym"
+ external_name = "valchedrym.viktorbarzin.lan"
+ tls_secret_name = var.tls_secret_name
+ port = 80
+ backend_protocol = "HTTP"
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ extra_annotations = { "gethomepage.dev/enabled" = "false" }
+}
+
+# https://ip150.viktorbarzin.me/
+# Server has funky behaviour based on headers; works on some browrsers not others...
+# module "valchedrym-ip150" {
+# source = "./factory"
+# name = "ip150"
+# # external_name = "valchedrym.ddns.net"
+# external_name = "192.168.0.10"
+# port = 80
+# backend_protocol = "HTTP"
+# use_proxy_protocol = false
+# tls_secret_name = var.tls_secret_name
+# protected = false
+# depends_on = [kubernetes_namespace.reverse-proxy]
+# }
+
+# https://mladost3.viktorbarzin.me/
+module "mladost3" {
+ source = "./factory"
+ name = "mladost3"
+ external_name = "mladost3.ddns.net"
+ port = 8080
+ tls_secret_name = var.tls_secret_name
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ extra_annotations = { "gethomepage.dev/enabled" = "false" }
+}
+
+# # https://server-switch.viktorbarzin.me/
+# module "server-switch" {
+# source = "./factory"
+# name = "server-switch"
+# external_name = "server-switch.viktorbarzin.lan"
+# port = 80
+# tls_secret_name = var.tls_secret_name
+# depends_on = [kubernetes_namespace.reverse-proxy]
+# }
+
+# https://ha-sofia.viktorbarzin.me/
+module "ha-sofia" {
+ source = "./factory"
+ name = "ha-sofia"
+ external_name = "ha-sofia.viktorbarzin.lan"
+ port = 8123
+ tls_secret_name = var.tls_secret_name
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ protected = false
+ rybbit_site_id = "590fc392690a"
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "Home Assistant Sofia"
+ "gethomepage.dev/description" = "Smart home hub"
+ "gethomepage.dev/icon" = "home-assistant.png"
+ "gethomepage.dev/group" = "Smart Home"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+# https://ha-london.viktorbarzin.me/
+module "ha-london" {
+ source = "./factory"
+ name = "ha-london"
+ external_name = "ha-london.viktorbarzin.lan"
+ port = 8123
+ tls_secret_name = var.tls_secret_name
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ protected = false
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "Home Assistant London"
+ "gethomepage.dev/description" = "Smart home hub"
+ "gethomepage.dev/icon" = "home-assistant.png"
+ "gethomepage.dev/group" = "Smart Home"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+# https://london.viktorbarzin.me/
+module "london" {
+ source = "./factory"
+ name = "london"
+ external_name = "openwrt-london.viktorbarzin.lan"
+ port = 443
+ tls_secret_name = var.tls_secret_name
+ backend_protocol = "HTTPS"
+ protected = true
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ extra_annotations = {
+ "gethomepage.dev/enabled" : "false"
+ "gethomepage.dev/description" : "OpenWRT London"
+ # gethomepage.dev/group: Media
+ "gethomepage.dev/icon" : "openwrt.png"
+ "gethomepage.dev/name" : "OpenWRT London"
+ "gethomepage.dev/widget.type" : "openwrt"
+ "gethomepage.dev/widget.url" : "https://100.64.0.14"
+ # "gethomepage.dev/widget.token" = var.homepage_token
+ "gethomepage.dev/widget.username" : "homepage"
+ "gethomepage.dev/widget.password" : "" # add later as Flint2's openwrt is a little odd
+ "gethomepage.dev/pod-selector" : ""
+ }
+}
+module "pi-lights" {
+ source = "./factory"
+ name = "pi"
+ external_name = "ha-london.viktorbarzin.lan"
+ port = 5000
+ tls_secret_name = var.tls_secret_name
+ protected = true
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ extra_annotations = { "gethomepage.dev/enabled" = "false" }
+}
+
+# module "ups" { # .NET app doesn't work well behind host
+# source = "./factory"
+# name = "ups"
+# external_name = "ups.viktorbarzin.lan"
+# backend_protocol = "HTTPS"
+# port = 443
+# tls_secret_name = var.tls_secret_name
+# # protected = true
+# protected = false
+# depends_on = [kubernetes_namespace.reverse-proxy]
+# extra_annotations = {
+# "nginx.ingress.kubernetes.io/upstream-vhost" : "",
+# # "nginx.ingress.kubernetes.io/proxy-set-header" : "Host: <>",
+# }
+# }
+
+module "mbp14" {
+ source = "./factory"
+ name = "mbp14"
+ external_name = "mbp14.viktorbarzin.lan"
+ port = 4020
+ tls_secret_name = var.tls_secret_name
+ protected = true
+ depends_on = [kubernetes_namespace.reverse-proxy]
+ extra_annotations = { "gethomepage.dev/enabled" = "false" }
+}
diff --git a/stacks/reverse-proxy/secrets b/stacks/reverse-proxy/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/reverse-proxy/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/reverse-proxy/terragrunt.hcl b/stacks/reverse-proxy/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/reverse-proxy/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/reverse-proxy/tiers.tf b/stacks/reverse-proxy/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/reverse-proxy/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/sealed-secrets/main.tf b/stacks/sealed-secrets/main.tf
new file mode 100644
index 00000000..fad73b0e
--- /dev/null
+++ b/stacks/sealed-secrets/main.tf
@@ -0,0 +1,4 @@
+module "sealed-secrets" {
+ source = "./modules/sealed-secrets"
+ tier = local.tiers.cluster
+}
diff --git a/stacks/sealed-secrets/modules/sealed-secrets/main.tf b/stacks/sealed-secrets/modules/sealed-secrets/main.tf
new file mode 100644
index 00000000..2175e5d8
--- /dev/null
+++ b/stacks/sealed-secrets/modules/sealed-secrets/main.tf
@@ -0,0 +1,45 @@
+variable "tier" { type = string }
+
+# -----------------------------------------------------------------------------
+# Namespace
+# -----------------------------------------------------------------------------
+resource "kubernetes_namespace" "sealed_secrets" {
+ metadata {
+ name = "sealed-secrets"
+ labels = {
+ tier = var.tier
+ }
+ }
+}
+
+# -----------------------------------------------------------------------------
+# Sealed Secrets — encrypts secrets for safe git storage
+# https://github.com/bitnami-labs/sealed-secrets
+# -----------------------------------------------------------------------------
+resource "helm_release" "sealed_secrets" {
+ namespace = kubernetes_namespace.sealed_secrets.metadata[0].name
+ create_namespace = false
+ name = "sealed-secrets"
+ atomic = true
+ timeout = 300
+
+ repository = "https://bitnami-labs.github.io/sealed-secrets"
+ chart = "sealed-secrets"
+ version = "2.18.3"
+
+ values = [yamlencode({
+ crds = {
+ create = true
+ }
+
+ resources = {
+ requests = {
+ cpu = "50m"
+ memory = "192Mi"
+ }
+ limits = {
+ memory = "192Mi"
+ }
+ }
+ })]
+}
diff --git a/stacks/sealed-secrets/secrets b/stacks/sealed-secrets/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/sealed-secrets/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/sealed-secrets/terragrunt.hcl b/stacks/sealed-secrets/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/sealed-secrets/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/sealed-secrets/tiers.tf b/stacks/sealed-secrets/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/sealed-secrets/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/technitium/main.tf b/stacks/technitium/main.tf
new file mode 100644
index 00000000..751d1159
--- /dev/null
+++ b/stacks/technitium/main.tf
@@ -0,0 +1,24 @@
+variable "tls_secret_name" { type = string }
+variable "nfs_server" { type = string }
+variable "mysql_host" { type = string }
+
+data "vault_kv_secret_v2" "secrets" {
+ mount = "secret"
+ name = "platform"
+}
+
+locals {
+ homepage_credentials = jsondecode(data.vault_kv_secret_v2.secrets.data["homepage_credentials"])
+}
+
+module "technitium" {
+ source = "./modules/technitium"
+ tls_secret_name = var.tls_secret_name
+ nfs_server = var.nfs_server
+ mysql_host = var.mysql_host
+ homepage_token = local.homepage_credentials["technitium"]["token"]
+ technitium_db_password = data.vault_kv_secret_v2.secrets.data["technitium_db_password"]
+ technitium_username = data.vault_kv_secret_v2.secrets.data["technitium_username"]
+ technitium_password = data.vault_kv_secret_v2.secrets.data["technitium_password"]
+ tier = local.tiers.core
+}
diff --git a/stacks/technitium/modules/technitium/dashboards/technitium-dns.json b/stacks/technitium/modules/technitium/dashboards/technitium-dns.json
new file mode 100644
index 00000000..b0b17c37
--- /dev/null
+++ b/stacks/technitium/modules/technitium/dashboards/technitium-dns.json
@@ -0,0 +1,488 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": { "type": "datasource", "uid": "grafana" },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "Technitium DNS query logs from MySQL",
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 1,
+ "id": null,
+ "links": [],
+ "panels": [
+ {
+ "title": "Total Queries",
+ "type": "stat",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "thresholds" },
+ "thresholds": {
+ "steps": [
+ { "color": "green", "value": null }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "textMode": "auto",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT COUNT(*) as total_queries FROM dns_logs WHERE $__timeFilter(timestamp)",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Cached %",
+ "type": "stat",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "thresholds" },
+ "unit": "percentunit",
+ "thresholds": {
+ "steps": [
+ { "color": "red", "value": null },
+ { "color": "yellow", "value": 0.3 },
+ { "color": "green", "value": 0.5 }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "textMode": "auto",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) / COUNT(*) as cached_pct FROM dns_logs WHERE $__timeFilter(timestamp)",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Blocked %",
+ "type": "stat",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "thresholds" },
+ "unit": "percentunit",
+ "thresholds": {
+ "steps": [
+ { "color": "green", "value": null },
+ { "color": "yellow", "value": 0.1 },
+ { "color": "red", "value": 0.3 }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "textMode": "auto",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) / COUNT(*) as blocked_pct FROM dns_logs WHERE $__timeFilter(timestamp)",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "NxDomain %",
+ "type": "stat",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "thresholds" },
+ "unit": "percentunit",
+ "thresholds": {
+ "steps": [
+ { "color": "green", "value": null },
+ { "color": "yellow", "value": 0.2 },
+ { "color": "red", "value": 0.5 }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "textMode": "auto",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT SUM(CASE WHEN rcode = 3 THEN 1 ELSE 0 END) / COUNT(*) as nxdomain_pct FROM dns_logs WHERE $__timeFilter(timestamp)",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Avg Response Time",
+ "type": "stat",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "thresholds" },
+ "unit": "ms",
+ "thresholds": {
+ "steps": [
+ { "color": "green", "value": null },
+ { "color": "yellow", "value": 50 },
+ { "color": "red", "value": 200 }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "textMode": "auto",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT AVG(response_rtt) as avg_rtt_ms FROM dns_logs WHERE $__timeFilter(timestamp) AND response_rtt IS NOT NULL",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Queries by Protocol",
+ "type": "stat",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" }
+ },
+ "overrides": []
+ },
+ "options": {
+ "colorMode": "background",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "textMode": "auto",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT SUM(CASE WHEN protocol = 0 THEN 1 ELSE 0 END) as UDP, SUM(CASE WHEN protocol = 1 THEN 1 ELSE 0 END) as TCP, SUM(CASE WHEN protocol = 3 THEN 1 ELSE 0 END) as DoH, SUM(CASE WHEN protocol = 4 THEN 1 ELSE 0 END) as DoT FROM dns_logs WHERE $__timeFilter(timestamp)",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Queries Over Time",
+ "type": "timeseries",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 8, "w": 24, "x": 0, "y": 4 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "bars",
+ "fillOpacity": 50,
+ "gradientMode": "none",
+ "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": { "type": "linear" },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": { "group": "A", "mode": "normal" }
+ }
+ },
+ "overrides": []
+ },
+ "options": {
+ "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom" },
+ "tooltip": { "mode": "multi", "sort": "desc" }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT $__timeGroup(timestamp, $__interval) as time, SUM(CASE WHEN response_type = 1 THEN 1 ELSE 0 END) as Authoritative, SUM(CASE WHEN response_type = 2 THEN 1 ELSE 0 END) as Recursive, SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) as Cached, SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) as Blocked, SUM(CASE WHEN response_type = 5 THEN 1 ELSE 0 END) as Dropped FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY time ORDER BY time",
+ "format": "time_series",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Response Codes",
+ "type": "piechart",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" }
+ },
+ "overrides": [
+ { "matcher": { "id": "byName", "options": "NOERROR" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
+ { "matcher": { "id": "byName", "options": "NXDOMAIN" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] },
+ { "matcher": { "id": "byName", "options": "SERVFAIL" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
+ { "matcher": { "id": "byName", "options": "REFUSED" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }
+ ]
+ },
+ "options": {
+ "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] },
+ "pieType": "donut",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true },
+ "tooltip": { "mode": "single" }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT SUM(CASE WHEN rcode = 0 THEN 1 ELSE 0 END) as NOERROR, SUM(CASE WHEN rcode = 2 THEN 1 ELSE 0 END) as SERVFAIL, SUM(CASE WHEN rcode = 3 THEN 1 ELSE 0 END) as NXDOMAIN, SUM(CASE WHEN rcode = 5 THEN 1 ELSE 0 END) as REFUSED, SUM(CASE WHEN rcode NOT IN (0,2,3,5) THEN 1 ELSE 0 END) as Other FROM dns_logs WHERE $__timeFilter(timestamp)",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Response Types",
+ "type": "piechart",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" }
+ },
+ "overrides": [
+ { "matcher": { "id": "byName", "options": "Cached" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
+ { "matcher": { "id": "byName", "options": "Blocked" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
+ { "matcher": { "id": "byName", "options": "Recursive" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }] },
+ { "matcher": { "id": "byName", "options": "Authoritative" }, "properties": [{ "id": "color", "value": { "fixedColor": "purple", "mode": "fixed" } }] }
+ ]
+ },
+ "options": {
+ "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] },
+ "pieType": "donut",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true },
+ "tooltip": { "mode": "single" }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT SUM(CASE WHEN response_type = 1 THEN 1 ELSE 0 END) as Authoritative, SUM(CASE WHEN response_type = 2 THEN 1 ELSE 0 END) as Recursive, SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) as Cached, SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) as Blocked, SUM(CASE WHEN response_type = 5 THEN 1 ELSE 0 END) as Dropped FROM dns_logs WHERE $__timeFilter(timestamp)",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Query Types",
+ "type": "piechart",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" }
+ },
+ "overrides": []
+ },
+ "options": {
+ "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] },
+ "pieType": "donut",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true },
+ "tooltip": { "mode": "single" }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT SUM(CASE WHEN qtype = 1 THEN 1 ELSE 0 END) as A, SUM(CASE WHEN qtype = 28 THEN 1 ELSE 0 END) as AAAA, SUM(CASE WHEN qtype = 5 THEN 1 ELSE 0 END) as CNAME, SUM(CASE WHEN qtype = 15 THEN 1 ELSE 0 END) as MX, SUM(CASE WHEN qtype = 16 THEN 1 ELSE 0 END) as TXT, SUM(CASE WHEN qtype = 33 THEN 1 ELSE 0 END) as SRV, SUM(CASE WHEN qtype = 12 THEN 1 ELSE 0 END) as PTR, SUM(CASE WHEN qtype = 6 THEN 1 ELSE 0 END) as SOA, SUM(CASE WHEN qtype = 2 THEN 1 ELSE 0 END) as NS, SUM(CASE WHEN qtype = 65 THEN 1 ELSE 0 END) as HTTPS, SUM(CASE WHEN qtype NOT IN (1,2,5,6,12,15,16,28,33,65) THEN 1 ELSE 0 END) as Other FROM dns_logs WHERE $__timeFilter(timestamp)",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Top 20 Queried Domains",
+ "type": "table",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
+ "fieldConfig": {
+ "defaults": {
+ "custom": { "filterable": true }
+ },
+ "overrides": [
+ { "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
+ ]
+ },
+ "options": {
+ "showHeader": true,
+ "sortBy": [{ "desc": true, "displayName": "count" }]
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY qname ORDER BY count DESC LIMIT 20",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Top 20 Clients",
+ "type": "table",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
+ "fieldConfig": {
+ "defaults": {
+ "custom": { "filterable": true }
+ },
+ "overrides": [
+ { "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
+ ]
+ },
+ "options": {
+ "showHeader": true,
+ "sortBy": [{ "desc": true, "displayName": "count" }]
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT client_ip, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY client_ip ORDER BY count DESC LIMIT 20",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Average Response Time Over Time",
+ "type": "timeseries",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 8, "w": 24, "x": 0, "y": 30 },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "unit": "ms",
+ "custom": {
+ "axisBorderShow": false,
+ "axisLabel": "Response Time (ms)",
+ "axisPlacement": "auto",
+ "drawStyle": "line",
+ "fillOpacity": 20,
+ "gradientMode": "none",
+ "lineWidth": 2,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": true
+ }
+ },
+ "overrides": []
+ },
+ "options": {
+ "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom" },
+ "tooltip": { "mode": "multi", "sort": "desc" }
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT $__timeGroup(timestamp, $__interval) as time, AVG(response_rtt) as avg_rtt, MAX(response_rtt) as max_rtt FROM dns_logs WHERE $__timeFilter(timestamp) AND response_rtt IS NOT NULL GROUP BY time ORDER BY time",
+ "format": "time_series",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Top 20 NxDomain Domains",
+ "type": "table",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 10, "w": 12, "x": 0, "y": 38 },
+ "fieldConfig": {
+ "defaults": {
+ "custom": { "filterable": true }
+ },
+ "overrides": [
+ { "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
+ ]
+ },
+ "options": {
+ "showHeader": true,
+ "sortBy": [{ "desc": true, "displayName": "count" }]
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) AND rcode = 3 GROUP BY qname ORDER BY count DESC LIMIT 20",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ },
+ {
+ "title": "Top 20 Blocked Domains",
+ "type": "table",
+ "datasource": { "type": "mysql", "uid": "technitium-mysql" },
+ "gridPos": { "h": 10, "w": 12, "x": 12, "y": 38 },
+ "fieldConfig": {
+ "defaults": {
+ "custom": { "filterable": true }
+ },
+ "overrides": [
+ { "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] }
+ ]
+ },
+ "options": {
+ "showHeader": true,
+ "sortBy": [{ "desc": true, "displayName": "count" }]
+ },
+ "targets": [
+ {
+ "rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) AND response_type = 4 GROUP BY qname ORDER BY count DESC LIMIT 20",
+ "format": "table",
+ "refId": "A"
+ }
+ ]
+ }
+ ],
+ "refresh": "5m",
+ "schemaVersion": 39,
+ "tags": ["dns", "technitium", "mysql"],
+ "templating": { "list": [] },
+ "time": { "from": "now-24h", "to": "now" },
+ "timepicker": {},
+ "timezone": "",
+ "title": "Technitium DNS",
+ "uid": "technitium-dns",
+ "version": 1
+}
diff --git a/stacks/technitium/modules/technitium/ha.tf b/stacks/technitium/modules/technitium/ha.tf
new file mode 100644
index 00000000..8ad16b95
--- /dev/null
+++ b/stacks/technitium/modules/technitium/ha.tf
@@ -0,0 +1,278 @@
+# =============================================================================
+# Technitium DNS — High Availability (Primary-Secondary)
+# =============================================================================
+#
+# Secondary DNS instance replicates zones from primary via AXFR.
+# Both pods share the `dns-server=true` label so the DNS LoadBalancer
+# in main.tf routes queries to whichever pod is healthy.
+
+module "nfs_secondary_config" {
+ source = "../../../../modules/kubernetes/nfs_volume"
+ name = "technitium-secondary-config"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ nfs_server = var.nfs_server
+ nfs_path = "/mnt/main/technitium-secondary"
+}
+
+# Primary-only service for zone transfers (AXFR) and API access
+resource "kubernetes_service" "technitium_primary" {
+ metadata {
+ name = "technitium-primary"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ labels = {
+ "app" = "technitium"
+ }
+ }
+
+ spec {
+ selector = {
+ app = "technitium"
+ }
+ port {
+ name = "dns-tcp"
+ port = 53
+ protocol = "TCP"
+ }
+ port {
+ name = "dns-udp"
+ port = 53
+ protocol = "UDP"
+ }
+ port {
+ name = "api"
+ port = 5380
+ protocol = "TCP"
+ }
+ }
+}
+
+# Secondary DNS deployment — zone-transfer replica
+resource "kubernetes_deployment" "technitium_secondary" {
+ metadata {
+ name = "technitium-secondary"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ labels = {
+ app = "technitium-secondary"
+ tier = var.tier
+ }
+ }
+ spec {
+ replicas = 1
+ strategy {
+ type = "RollingUpdate"
+ rolling_update {
+ max_unavailable = "0"
+ max_surge = "1"
+ }
+ }
+ selector {
+ match_labels = {
+ app = "technitium-secondary"
+ }
+ }
+ template {
+ metadata {
+ labels = {
+ app = "technitium-secondary"
+ "dns-server" = "true"
+ }
+ }
+ spec {
+ affinity {
+ pod_anti_affinity {
+ required_during_scheduling_ignored_during_execution {
+ label_selector {
+ match_expressions {
+ key = "dns-server"
+ operator = "In"
+ values = ["true"]
+ }
+ }
+ topology_key = "kubernetes.io/hostname"
+ }
+ }
+ }
+ container {
+ image = "technitium/dns-server:latest"
+ name = "technitium"
+ env {
+ name = "DNS_SERVER_ADMIN_PASSWORD"
+ value = var.technitium_password
+ }
+ env {
+ name = "DNS_SERVER_ENABLE_BLOCKING"
+ value = "true"
+ }
+ resources {
+ requests = {
+ cpu = "25m"
+ memory = "512Mi"
+ }
+ limits = {
+ memory = "512Mi"
+ }
+ }
+ port {
+ container_port = 5380
+ }
+ port {
+ container_port = 53
+ }
+ port {
+ container_port = 80
+ }
+ liveness_probe {
+ tcp_socket {
+ port = 53
+ }
+ initial_delay_seconds = 10
+ period_seconds = 10
+ }
+ readiness_probe {
+ tcp_socket {
+ port = 53
+ }
+ initial_delay_seconds = 5
+ period_seconds = 5
+ }
+ volume_mount {
+ mount_path = "/etc/dns"
+ name = "nfs-config"
+ }
+ }
+ volume {
+ name = "nfs-config"
+ persistent_volume_claim {
+ claim_name = module.nfs_secondary_config.claim_name
+ }
+ }
+ dns_config {
+ option {
+ name = "ndots"
+ value = "2"
+ }
+ }
+ }
+ }
+ }
+}
+
+# Secondary web service — internal only, used by setup Job
+resource "kubernetes_service" "technitium_secondary_web" {
+ metadata {
+ name = "technitium-secondary-web"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ labels = {
+ "app" = "technitium-secondary"
+ }
+ }
+
+ spec {
+ selector = {
+ app = "technitium-secondary"
+ }
+ port {
+ name = "api"
+ port = 5380
+ protocol = "TCP"
+ }
+ }
+}
+
+# PodDisruptionBudget — keep at least 1 DNS pod running during voluntary disruptions
+resource "kubernetes_pod_disruption_budget_v1" "technitium_dns" {
+ metadata {
+ name = "technitium-dns"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ }
+ spec {
+ min_available = "1"
+ selector {
+ match_labels = {
+ "dns-server" = "true"
+ }
+ }
+ }
+}
+
+# Setup Job — configures secondary zones via Technitium REST API
+resource "kubernetes_job" "technitium_secondary_setup" {
+ metadata {
+ name = "technitium-secondary-setup"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ }
+ spec {
+ backoff_limit = 5
+ template {
+ metadata {}
+ spec {
+ restart_policy = "OnFailure"
+ container {
+ name = "setup"
+ image = "curlimages/curl:latest"
+ command = ["/bin/sh", "-c", <<-SCRIPT
+ set -e
+ PRIMARY="http://technitium-primary.technitium.svc.cluster.local:5380"
+ SECONDARY="http://technitium-secondary-web.technitium.svc.cluster.local:5380"
+
+ # Wait for both to be ready
+ until curl -sf "$PRIMARY/api/user/login?user=$TECH_USER&pass=$TECH_PASS" -o /tmp/p.json; do echo "Waiting for primary..."; sleep 5; done
+ until curl -sf "$SECONDARY/api/user/login?user=$TECH_USER&pass=$TECH_PASS" -o /tmp/s.json; do echo "Waiting for secondary..."; sleep 5; done
+ P_TOKEN=$(cat /tmp/p.json | sed -n 's/.*"token":"\([^"]*\)".*/\1/p')
+ S_TOKEN=$(cat /tmp/s.json | sed -n 's/.*"token":"\([^"]*\)".*/\1/p')
+
+ # Get zones from primary (split JSON into lines so sed can match each zone)
+ curl -sf "$PRIMARY/api/zones/list?token=$P_TOKEN" | tr ',' '\n' | sed -n 's/.*"name":"\([^"]*\)".*/\1/p' > /tmp/zones.txt
+ echo "Found zones:"; cat /tmp/zones.txt
+
+ # Enable zone transfers on primary for each zone
+ while read -r zone; do
+ echo "Enabling zone transfer for: $zone"
+ curl -sf "$PRIMARY/api/zones/options/set?token=$P_TOKEN&zone=$zone&zoneTransfer=Allow" || true
+ done < /tmp/zones.txt
+
+ # Create secondary zones on secondary instance (ignore "already exists" errors)
+ while read -r zone; do
+ echo "Creating secondary zone: $zone"
+ curl -sf "$SECONDARY/api/zones/create?token=$S_TOKEN&zone=$zone&type=Secondary&primaryNameServerAddresses=$PRIMARY_IP" || true
+ done < /tmp/zones.txt
+
+ # Force resync all secondary zones to pull latest data
+ while read -r zone; do
+ echo "Resyncing: $zone"
+ curl -sf "$SECONDARY/api/zones/resync?token=$S_TOKEN&zone=$zone" || true
+ done < /tmp/zones.txt
+
+ echo "Secondary zone setup complete"
+ SCRIPT
+ ]
+ env {
+ name = "TECH_USER"
+ value = var.technitium_username
+ }
+ env {
+ name = "TECH_PASS"
+ value = var.technitium_password
+ }
+ env {
+ name = "PRIMARY_IP"
+ value = kubernetes_service.technitium_primary.spec[0].cluster_ip
+ }
+ }
+ dns_config {
+ option {
+ name = "ndots"
+ value = "2"
+ }
+ }
+ }
+ }
+ }
+
+ depends_on = [
+ kubernetes_deployment.technitium,
+ kubernetes_deployment.technitium_secondary,
+ kubernetes_service.technitium_primary,
+ kubernetes_service.technitium_secondary_web,
+ ]
+}
diff --git a/stacks/technitium/modules/technitium/main.tf b/stacks/technitium/modules/technitium/main.tf
new file mode 100644
index 00000000..34d90a81
--- /dev/null
+++ b/stacks/technitium/modules/technitium/main.tf
@@ -0,0 +1,356 @@
+variable "tls_secret_name" {}
+variable "tier" { type = string }
+variable "homepage_token" {}
+variable "technitium_db_password" {}
+variable "nfs_server" { type = string }
+variable "mysql_host" { type = string }
+variable "technitium_username" { type = string }
+variable "technitium_password" {
+ type = string
+ sensitive = true
+}
+
+resource "kubernetes_namespace" "technitium" {
+ metadata {
+ name = "technitium"
+ labels = {
+ tier = var.tier
+ }
+ # stale cache error when trying to resolve
+ # labels = {
+ # "istio-injection" : "enabled"
+ # }
+ }
+}
+
+module "tls_secret" {
+ source = "../../../../modules/kubernetes/setup_tls_secret"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ tls_secret_name = var.tls_secret_name
+}
+
+# CoreDNS Corefile - manages cluster DNS resolution
+# The viktorbarzin.lan block forwards to Technitium via LoadBalancer.
+# A template regex in the viktorbarzin.lan block short-circuits junk queries
+# caused by ndots:5 search domain expansion (e.g. www.cloudflare.com.viktorbarzin.lan,
+# redis.redis.svc.cluster.local.viktorbarzin.lan) by returning NXDOMAIN for any
+# query with 2+ labels before .viktorbarzin.lan. Legitimate single-label queries
+# (e.g. idrac.viktorbarzin.lan) fall through to Technitium.
+resource "kubernetes_config_map" "coredns" {
+ metadata {
+ name = "coredns"
+ namespace = "kube-system"
+ }
+
+ data = {
+ Corefile = <<-EOF
+ .:53 {
+ #log
+ errors
+ health {
+ lameduck 5s
+ }
+ ready
+ kubernetes cluster.local in-addr.arpa ip6.arpa {
+ pods insecure
+ fallthrough in-addr.arpa ip6.arpa
+ ttl 30
+ }
+ prometheus :9153
+ forward . 8.8.8.8 1.1.1.1 10.0.20.1
+ cache {
+ success 10000 300 6
+ denial 10000 300 60
+ }
+ loop
+ reload
+ loadbalance
+ }
+ viktorbarzin.lan:53 {
+ #log
+ errors
+ template ANY ANY viktorbarzin.lan {
+ match ".*\..*\.viktorbarzin\.lan\.$"
+ rcode NXDOMAIN
+ fallthrough
+ }
+ forward . 10.0.20.204 # Technitium LoadBalancer
+ cache {
+ success 10000 300 6
+ denial 10000 300 60
+ }
+ }
+ EOF
+ }
+}
+
+module "nfs_config" {
+ source = "../../../../modules/kubernetes/nfs_volume"
+ name = "technitium-config"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ nfs_server = var.nfs_server
+ nfs_path = "/mnt/main/technitium"
+}
+
+resource "kubernetes_deployment" "technitium" {
+ # resource "kubernetes_daemonset" "technitium" {
+ metadata {
+ name = "technitium"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ labels = {
+ app = "technitium"
+ tier = var.tier
+ }
+ }
+ spec {
+ strategy {
+ type = "RollingUpdate"
+ rolling_update {
+ max_unavailable = "0"
+ max_surge = "1"
+ }
+ }
+ # replicas = 1
+ selector {
+ match_labels = {
+ app = "technitium"
+ }
+ }
+ template {
+ metadata {
+ annotations = {
+ "diun.enable" = "false"
+ # "diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$"
+ "diun.include_tags" = "latest"
+ }
+ labels = {
+ app = "technitium"
+ "dns-server" = "true"
+ }
+ }
+ spec {
+ affinity {
+ # Prefer nodes running Traefik for network locality
+ pod_affinity {
+ preferred_during_scheduling_ignored_during_execution {
+ weight = 100
+ pod_affinity_term {
+ label_selector {
+ match_expressions {
+ key = "app.kubernetes.io/name"
+ operator = "In"
+ values = ["traefik"]
+ }
+ }
+ topology_key = "kubernetes.io/hostname"
+ }
+ }
+ }
+ # Spread DNS pods across nodes for HA
+ pod_anti_affinity {
+ required_during_scheduling_ignored_during_execution {
+ label_selector {
+ match_expressions {
+ key = "dns-server"
+ operator = "In"
+ values = ["true"]
+ }
+ }
+ topology_key = "kubernetes.io/hostname"
+ }
+ }
+ }
+ container {
+ image = "technitium/dns-server:latest"
+ name = "technitium"
+ resources {
+ requests = {
+ cpu = "25m"
+ memory = "512Mi"
+ }
+ limits = {
+ memory = "512Mi"
+ }
+ }
+ port {
+ container_port = 5380
+ }
+ port {
+ container_port = 53
+ }
+ port {
+ container_port = 80
+ }
+ liveness_probe {
+ tcp_socket {
+ port = 53
+ }
+ initial_delay_seconds = 10
+ period_seconds = 10
+ }
+ readiness_probe {
+ tcp_socket {
+ port = 53
+ }
+ initial_delay_seconds = 5
+ period_seconds = 5
+ }
+ volume_mount {
+ mount_path = "/etc/dns"
+ name = "nfs-config"
+ }
+ volume_mount {
+ mount_path = "/etc/tls/"
+ name = "tls-cert"
+ }
+ }
+ volume {
+ name = "nfs-config"
+ persistent_volume_claim {
+ claim_name = module.nfs_config.claim_name
+ }
+ }
+ volume {
+ name = "tls-cert"
+ secret {
+ secret_name = var.tls_secret_name
+ }
+ }
+ dns_config {
+ option {
+ name = "ndots"
+ value = "2"
+ }
+ }
+ }
+ }
+ }
+}
+
+resource "kubernetes_service" "technitium-web" {
+ metadata {
+ name = "technitium-web"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ labels = {
+ "app" = "technitium"
+ }
+ # annotations = {
+ # "metallb.universe.tf/allow-shared-ip" : "shared"
+ # }
+ }
+
+ spec {
+ # type = "LoadBalancer"
+ # external_traffic_policy = "Cluster"
+ selector = {
+ app = "technitium"
+ }
+ port {
+ name = "technitium-dns"
+ port = "5380"
+ protocol = "TCP"
+ }
+ port {
+ name = "technitium-doh"
+ port = "80"
+ protocol = "TCP"
+ }
+ }
+}
+
+resource "kubernetes_service" "technitium-dns" {
+ metadata {
+ name = "technitium-dns"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ labels = {
+ "app" = "technitium"
+ }
+ }
+
+ spec {
+ type = "LoadBalancer"
+ port {
+ name = "technitium-dns"
+ port = 53
+ protocol = "UDP"
+ }
+ external_traffic_policy = "Local"
+ selector = {
+ "dns-server" = "true"
+ }
+ }
+}
+module "ingress" {
+ source = "../../../../modules/kubernetes/ingress_factory"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ name = "technitium"
+ tls_secret_name = var.tls_secret_name
+ port = 5380
+ service_name = "technitium-web"
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/description" = "Internal DNS Server and Recursive Resolver"
+ "gethomepage.dev/group" = "Infrastructure"
+ "gethomepage.dev/icon" : "technitium.png"
+ "gethomepage.dev/name" = "Technitium"
+ "gethomepage.dev/widget.type" = "technitium"
+ "gethomepage.dev/widget.url" = "http://technitium-web.technitium.svc.cluster.local:5380"
+ "gethomepage.dev/widget.key" = var.homepage_token
+
+ "gethomepage.dev/widget.range" = "LastWeek"
+ "gethomepage.dev/widget.fields" = "[\"totalQueries\", \"totalCached\", \"totalBlocked\", \"totalRecursive\"]"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+module "ingress-doh" {
+ source = "../../../../modules/kubernetes/ingress_factory"
+ namespace = kubernetes_namespace.technitium.metadata[0].name
+ name = "technitium-doh"
+ tls_secret_name = var.tls_secret_name
+ host = "dns"
+ service_name = "technitium-web"
+}
+
+# Grafana datasource for Technitium DNS query logs in MySQL
+resource "kubernetes_config_map" "grafana_technitium_datasource" {
+ metadata {
+ name = "grafana-technitium-datasource"
+ namespace = "monitoring"
+ labels = {
+ grafana_datasource = "1"
+ }
+ }
+ data = {
+ "technitium-datasource.yaml" = yamlencode({
+ apiVersion = 1
+ datasources = [{
+ name = "Technitium MySQL"
+ type = "mysql"
+ access = "proxy"
+ url = "${var.mysql_host}:3306"
+ database = "technitium"
+ user = "technitium"
+ uid = "technitium-mysql"
+ secureJsonData = {
+ password = var.technitium_db_password
+ }
+ }]
+ })
+ }
+}
+
+# Grafana dashboard for Technitium DNS query logs
+resource "kubernetes_config_map" "grafana_technitium_dashboard" {
+ metadata {
+ name = "grafana-technitium-dashboard"
+ namespace = "monitoring"
+ labels = {
+ grafana_dashboard = "1"
+ }
+ }
+ data = {
+ "technitium-dns.json" = file("${path.module}/dashboards/technitium-dns.json")
+ }
+}
+
diff --git a/stacks/technitium/secrets b/stacks/technitium/secrets
new file mode 120000
index 00000000..ca54a7cf
--- /dev/null
+++ b/stacks/technitium/secrets
@@ -0,0 +1 @@
+../../secrets
\ No newline at end of file
diff --git a/stacks/technitium/terragrunt.hcl b/stacks/technitium/terragrunt.hcl
new file mode 100644
index 00000000..4f16dddf
--- /dev/null
+++ b/stacks/technitium/terragrunt.hcl
@@ -0,0 +1,8 @@
+include "root" {
+ path = find_in_parent_folders()
+}
+
+dependency "infra" {
+ config_path = "../infra"
+ skip_outputs = true
+}
diff --git a/stacks/technitium/tiers.tf b/stacks/technitium/tiers.tf
new file mode 100644
index 00000000..eb0f8083
--- /dev/null
+++ b/stacks/technitium/tiers.tf
@@ -0,0 +1,10 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+locals {
+ tiers = {
+ core = "0-core"
+ cluster = "1-cluster"
+ gpu = "2-gpu"
+ edge = "3-edge"
+ aux = "4-aux"
+ }
+}
diff --git a/stacks/traefik/main.tf b/stacks/traefik/main.tf
new file mode 100644
index 00000000..c87cba71
--- /dev/null
+++ b/stacks/traefik/main.tf
@@ -0,0 +1,16 @@
+variable "tls_secret_name" { type = string }
+variable "redis_host" { type = string }
+
+data "vault_kv_secret_v2" "secrets" {
+ mount = "secret"
+ name = "platform"
+}
+
+module "traefik" {
+ source = "./modules/traefik"
+ tier = local.tiers.core
+ crowdsec_api_key = data.vault_kv_secret_v2.secrets.data["ingress_crowdsec_api_key"]
+ redis_host = var.redis_host
+ tls_secret_name = var.tls_secret_name
+ auth_fallback_htpasswd = data.vault_kv_secret_v2.secrets.data["auth_fallback_htpasswd"]
+}
diff --git a/stacks/traefik/modules/traefik/main.tf b/stacks/traefik/modules/traefik/main.tf
new file mode 100644
index 00000000..6428322c
--- /dev/null
+++ b/stacks/traefik/modules/traefik/main.tf
@@ -0,0 +1,629 @@
+variable "tier" { type = string }
+variable "crowdsec_api_key" {
+ type = string
+ sensitive = true
+}
+variable "redis_host" { type = string }
+variable "tls_secret_name" {}
+variable "auth_fallback_htpasswd" {
+ type = string
+ description = "htpasswd-format string for emergency basicAuth fallback when Authentik is down"
+ sensitive = true
+}
+
+resource "kubernetes_namespace" "traefik" {
+ metadata {
+ name = "traefik"
+ labels = {
+ "app.kubernetes.io/name" = "traefik"
+ "app.kubernetes.io/instance" = "traefik"
+ tier = var.tier
+ }
+ }
+}
+
+resource "helm_release" "traefik" {
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ create_namespace = false
+ name = "traefik"
+ repository = "https://traefik.github.io/charts"
+ chart = "traefik"
+ atomic = true
+ timeout = 600
+
+ values = [yamlencode({
+ deployment = {
+ replicas = 3
+ podAnnotations = {
+ "diun.enable" = "true"
+ "diun.include_tags" = "^v\\d+(?:\\.\\d+)?(?:\\.\\d+)?.*$"
+ }
+ initContainers = [{
+ name = "download-plugins"
+ image = "alpine:3"
+ command = ["sh", "-c", join("", [
+ "set -e; ",
+ "STORAGE=/plugins-storage; ",
+ "mkdir -p \"$STORAGE/archives/github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin\"; ",
+ "mkdir -p \"$STORAGE/archives/github.com/packruler/rewrite-body\"; ",
+ "wget -q -T 30 -O \"$STORAGE/archives/github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin/v1.4.2.zip\" ",
+ "\"https://github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin/archive/refs/tags/v1.4.2.zip\"; ",
+ "wget -q -T 30 -O \"$STORAGE/archives/github.com/packruler/rewrite-body/v1.2.0.zip\" ",
+ "\"https://github.com/packruler/rewrite-body/archive/refs/tags/v1.2.0.zip\"; ",
+ "printf '{\"github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin\":\"v1.4.2\",\"github.com/packruler/rewrite-body\":\"v1.2.0\"}' ",
+ "> \"$STORAGE/archives/state.json\"; ",
+ "echo \"Plugins pre-downloaded successfully\"",
+ ])]
+ volumeMounts = [{
+ name = "plugins"
+ mountPath = "/plugins-storage"
+ }]
+ }]
+ }
+
+ updateStrategy = {
+ type = "RollingUpdate"
+ rollingUpdate = {
+ maxUnavailable = 0
+ maxSurge = 1
+ }
+ }
+
+ ingressClass = {
+ enabled = true
+ isDefaultClass = true
+ }
+
+ providers = {
+ kubernetesIngress = {
+ enabled = true
+ allowExternalNameServices = true
+ publishedService = { enabled = true }
+ }
+ kubernetesCRD = {
+ enabled = true
+ allowExternalNameServices = true
+ allowCrossNamespace = true
+ }
+ }
+
+ # Enable dashboard API (accessible on port 8080 internally)
+ api = {
+ insecure = false
+ }
+
+ # Entrypoints
+ ports = {
+ web = {
+ port = 8000
+ exposedPort = 80
+ protocol = "TCP"
+ http = {
+ redirections = {
+ entryPoint = {
+ to = "websecure"
+ scheme = "https"
+ }
+ }
+ }
+ }
+ websecure = {
+ port = 8443
+ exposedPort = 443
+ protocol = "TCP"
+ http = {
+ tls = {
+ enabled = true
+ }
+ }
+ http3 = {
+ enabled = true
+ advertisedPort = 443
+ }
+ }
+ whisper-tcp = {
+ port = 10300
+ exposedPort = 10300
+ protocol = "TCP"
+ expose = { default = true }
+ }
+ piper-tcp = {
+ port = 10200
+ exposedPort = 10200
+ protocol = "TCP"
+ expose = { default = true }
+ }
+ ollama-tcp = {
+ port = 11434
+ exposedPort = 11434
+ protocol = "TCP"
+ expose = { default = true }
+ }
+ }
+
+ service = {
+ type = "LoadBalancer"
+ annotations = {
+ "metallb.universe.tf/loadBalancerIPs" = "10.0.20.202"
+ }
+ spec = {
+ externalTrafficPolicy = "Local"
+ }
+ }
+
+ # Plugins
+ experimental = {
+ plugins = {
+ crowdsec-bouncer = {
+ moduleName = "github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin"
+ version = "v1.4.2"
+ }
+ rewrite-body = {
+ moduleName = "github.com/packruler/rewrite-body"
+ version = "v1.2.0"
+ }
+ }
+ }
+
+ # Prometheus metrics
+ metrics = {
+ prometheus = {
+ entryPoint = "metrics"
+ addEntryPointsLabels = true
+ addServicesLabels = true
+ addRoutersLabels = true
+ }
+ }
+
+ # Access logs
+ logs = {
+ access = {
+ enabled = true
+ }
+ }
+
+ additionalArguments = [
+ "--global.checknewversion=false",
+ "--global.sendanonymoususage=false",
+ # Skip TLS verification for self-signed backend certs (proxmox, idrac, etc.)
+ "--serversTransport.insecureSkipVerify=true",
+ # Increase timeouts for services like Immich
+ "--serversTransport.forwardingTimeouts.dialTimeout=60s",
+ "--serversTransport.forwardingTimeouts.responseHeaderTimeout=30s",
+ "--serversTransport.forwardingTimeouts.idleConnTimeout=90s",
+ # Use forwarded headers from trusted proxies
+ "--entryPoints.websecure.forwardedHeaders.insecure=false",
+ "--entryPoints.web.forwardedHeaders.insecure=false",
+ "--entryPoints.websecure.forwardedHeaders.trustedIPs=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22,10.0.0.0/8,192.168.0.0/16",
+ "--entryPoints.web.forwardedHeaders.trustedIPs=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22,10.0.0.0/8,192.168.0.0/16",
+ ]
+
+ resources = {
+ requests = {
+ cpu = "100m"
+ memory = "384Mi"
+ }
+ limits = {
+ memory = "384Mi"
+ }
+ }
+
+ nodeSelector = {
+ "kubernetes.io/os" = "linux"
+ }
+
+ tolerations = []
+
+ topologySpreadConstraints = [{
+ maxSkew = 1
+ topologyKey = "kubernetes.io/hostname"
+ whenUnsatisfiable = "DoNotSchedule"
+ labelSelector = {
+ matchLabels = {
+ "app.kubernetes.io/name" = "traefik"
+ }
+ }
+ }]
+
+ podDisruptionBudget = {
+ enabled = true
+ minAvailable = 2
+ }
+ })]
+}
+
+# Dashboard resources
+module "tls_secret" {
+ source = "../../../../modules/kubernetes/setup_tls_secret"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ tls_secret_name = var.tls_secret_name
+}
+
+resource "kubernetes_service" "traefik_dashboard" {
+ metadata {
+ name = "traefik-dashboard"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ labels = {
+ "app" = "traefik-dashboard"
+ }
+ }
+
+ spec {
+ selector = {
+ "app.kubernetes.io/name" = "traefik"
+ }
+ port {
+ name = "http"
+ port = 8080
+ target_port = 8080
+ protocol = "TCP"
+ }
+ }
+}
+
+module "ingress" {
+ source = "../../../../modules/kubernetes/ingress_factory"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ name = "traefik"
+ service_name = "traefik-dashboard"
+ host = "traefik"
+ port = 8080
+ tls_secret_name = var.tls_secret_name
+ protected = true
+ extra_annotations = {
+ "gethomepage.dev/enabled" = "true"
+ "gethomepage.dev/name" = "Traefik"
+ "gethomepage.dev/description" = "Reverse proxy & ingress"
+ "gethomepage.dev/icon" = "traefik.png"
+ "gethomepage.dev/group" = "Core Platform"
+ "gethomepage.dev/pod-selector" = ""
+ }
+}
+
+# Bot-block resilience proxy: nginx reverse proxy in front of Poison Fountain
+# Returns 200 (allow all traffic) if Poison Fountain is unreachable (fail-open)
+resource "kubernetes_config_map" "bot_block_proxy_config" {
+ metadata {
+ name = "bot-block-proxy-config"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+
+ data = {
+ "default.conf" = <<-EOT
+ upstream poison_fountain {
+ server poison-fountain.poison-fountain.svc.cluster.local:8080;
+ }
+ server {
+ listen 8080;
+ location /auth {
+ proxy_pass http://poison_fountain;
+ proxy_connect_timeout 3s;
+ proxy_read_timeout 5s;
+ proxy_send_timeout 5s;
+ proxy_intercept_errors on;
+ error_page 502 503 504 =200 /fallback-allow;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+ }
+ location = /fallback-allow {
+ internal;
+ return 200 "allowed";
+ }
+ location /healthz {
+ access_log off;
+ return 200 "ok";
+ }
+ }
+ EOT
+ }
+}
+
+resource "kubernetes_deployment" "bot_block_proxy" {
+ metadata {
+ name = "bot-block-proxy"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ labels = {
+ app = "bot-block-proxy"
+ }
+ }
+
+ spec {
+ replicas = 2
+ strategy {
+ type = "RollingUpdate"
+ rolling_update {
+ max_unavailable = 0
+ max_surge = 1
+ }
+ }
+ selector {
+ match_labels = {
+ app = "bot-block-proxy"
+ }
+ }
+ template {
+ metadata {
+ labels = {
+ app = "bot-block-proxy"
+ }
+ }
+ spec {
+ topology_spread_constraint {
+ max_skew = 1
+ topology_key = "kubernetes.io/hostname"
+ when_unsatisfiable = "DoNotSchedule"
+ label_selector {
+ match_labels = {
+ app = "bot-block-proxy"
+ }
+ }
+ }
+ container {
+ name = "nginx"
+ image = "nginx:1-alpine"
+
+ port {
+ container_port = 8080
+ }
+
+ volume_mount {
+ name = "config"
+ mount_path = "/etc/nginx/conf.d"
+ read_only = true
+ }
+
+ liveness_probe {
+ http_get {
+ path = "/healthz"
+ port = 8080
+ }
+ initial_delay_seconds = 3
+ period_seconds = 10
+ }
+ readiness_probe {
+ http_get {
+ path = "/healthz"
+ port = 8080
+ }
+ initial_delay_seconds = 2
+ period_seconds = 5
+ }
+
+ resources {
+ requests = {
+ cpu = "5m"
+ memory = "64Mi"
+ }
+ limits = {
+ memory = "64Mi"
+ }
+ }
+ }
+
+ volume {
+ name = "config"
+ config_map {
+ name = kubernetes_config_map.bot_block_proxy_config.metadata[0].name
+ }
+ }
+ }
+ }
+ }
+}
+
+resource "kubernetes_service" "bot_block_proxy" {
+ metadata {
+ name = "bot-block-proxy"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ labels = {
+ app = "bot-block-proxy"
+ }
+ }
+
+ spec {
+ selector = {
+ app = "bot-block-proxy"
+ }
+ port {
+ name = "http"
+ port = 8080
+ target_port = 8080
+ }
+ }
+}
+
+# Resilience proxy for Authentik ForwardAuth
+# Falls back to basicAuth when Authentik is unreachable
+resource "kubernetes_secret" "auth_proxy_htpasswd" {
+ metadata {
+ name = "auth-proxy-htpasswd"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+
+ data = {
+ "htpasswd" = var.auth_fallback_htpasswd
+ }
+}
+
+resource "kubernetes_config_map" "auth_proxy_config" {
+ metadata {
+ name = "auth-proxy-config"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+
+ data = {
+ "default.conf" = <<-EOT
+ upstream authentik {
+ server ak-outpost-authentik-embedded-outpost.authentik.svc.cluster.local:9000;
+ }
+ server {
+ listen 9000;
+
+ location /outpost.goauthentik.io/auth/traefik {
+ proxy_pass http://authentik;
+ proxy_connect_timeout 3s;
+ proxy_read_timeout 5s;
+ proxy_send_timeout 5s;
+ proxy_intercept_errors on;
+ error_page 502 503 504 = @fallback_auth;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+ proxy_set_header X-Original-URL $scheme://$http_host$request_uri;
+ }
+
+ location @fallback_auth {
+ auth_basic "Emergency Access";
+ auth_basic_user_file /etc/nginx/htpasswd;
+ add_header X-authentik-username $remote_user always;
+ add_header X-Auth-Fallback "true" always;
+ return 200;
+ }
+
+ location /outpost.goauthentik.io/ {
+ proxy_pass http://authentik;
+ proxy_connect_timeout 3s;
+ proxy_read_timeout 10s;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+ }
+
+ location /healthz {
+ access_log off;
+ return 200 "ok";
+ }
+ }
+ EOT
+ }
+}
+
+resource "kubernetes_deployment" "auth_proxy" {
+ metadata {
+ name = "auth-proxy"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ labels = {
+ app = "auth-proxy"
+ }
+ }
+
+ spec {
+ replicas = 2
+ strategy {
+ type = "RollingUpdate"
+ rolling_update {
+ max_unavailable = 0
+ max_surge = 1
+ }
+ }
+ selector {
+ match_labels = {
+ app = "auth-proxy"
+ }
+ }
+ template {
+ metadata {
+ labels = {
+ app = "auth-proxy"
+ }
+ }
+ spec {
+ topology_spread_constraint {
+ max_skew = 1
+ topology_key = "kubernetes.io/hostname"
+ when_unsatisfiable = "DoNotSchedule"
+ label_selector {
+ match_labels = {
+ app = "auth-proxy"
+ }
+ }
+ }
+ container {
+ name = "nginx"
+ image = "nginx:1-alpine"
+
+ port {
+ container_port = 9000
+ }
+
+ volume_mount {
+ name = "config"
+ mount_path = "/etc/nginx/conf.d"
+ read_only = true
+ }
+ volume_mount {
+ name = "htpasswd"
+ mount_path = "/etc/nginx/htpasswd"
+ sub_path = "htpasswd"
+ read_only = true
+ }
+
+ liveness_probe {
+ http_get {
+ path = "/healthz"
+ port = 9000
+ }
+ initial_delay_seconds = 3
+ period_seconds = 10
+ }
+ readiness_probe {
+ http_get {
+ path = "/healthz"
+ port = 9000
+ }
+ initial_delay_seconds = 2
+ period_seconds = 5
+ }
+
+ resources {
+ requests = {
+ cpu = "5m"
+ memory = "64Mi"
+ }
+ limits = {
+ memory = "64Mi"
+ }
+ }
+ }
+
+ volume {
+ name = "config"
+ config_map {
+ name = kubernetes_config_map.auth_proxy_config.metadata[0].name
+ }
+ }
+ volume {
+ name = "htpasswd"
+ secret {
+ secret_name = kubernetes_secret.auth_proxy_htpasswd.metadata[0].name
+ }
+ }
+ }
+ }
+ }
+}
+
+resource "kubernetes_service" "auth_proxy" {
+ metadata {
+ name = "auth-proxy"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ labels = {
+ app = "auth-proxy"
+ }
+ }
+
+ spec {
+ selector = {
+ app = "auth-proxy"
+ }
+ port {
+ name = "http"
+ port = 9000
+ target_port = 9000
+ }
+ }
+}
diff --git a/stacks/traefik/modules/traefik/middleware.tf b/stacks/traefik/modules/traefik/middleware.tf
new file mode 100644
index 00000000..6e720e09
--- /dev/null
+++ b/stacks/traefik/modules/traefik/middleware.tf
@@ -0,0 +1,363 @@
+# Shared Traefik Middleware CRDs
+# These are referenced by ingress resources via annotations like:
+# "traefik.ingress.kubernetes.io/router.middlewares" = "traefik-rate-limit@kubernetescrd"
+
+# Rate limiting middleware
+resource "kubernetes_manifest" "middleware_rate_limit" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "rate-limit"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ rateLimit = {
+ average = 10
+ burst = 50
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# Authentik forward auth middleware
+resource "kubernetes_manifest" "middleware_authentik_forward_auth" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "authentik-forward-auth"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ forwardAuth = {
+ address = "http://auth-proxy.traefik.svc.cluster.local:9000/outpost.goauthentik.io/auth/traefik"
+ trustForwardHeader = true
+ authResponseHeaders = [
+ "X-authentik-username",
+ "X-authentik-uid",
+ "X-authentik-email",
+ "X-authentik-name",
+ "X-authentik-groups",
+ "Set-Cookie",
+ ]
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# IP allowlist for local-only access
+resource "kubernetes_manifest" "middleware_local_only" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "local-only"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ ipAllowList = {
+ sourceRange = [
+ "192.168.1.0/24",
+ "10.0.0.0/8",
+ "fc00::/7",
+ "fe80::/10",
+ ]
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# HTTPS redirect middleware
+resource "kubernetes_manifest" "middleware_redirect_https" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "redirect-https"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ redirectScheme = {
+ scheme = "https"
+ permanent = true
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# CSP headers middleware (default)
+resource "kubernetes_manifest" "middleware_csp_headers" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "csp-headers"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ headers = {
+ contentSecurityPolicy = "frame-ancestors 'self' *.viktorbarzin.me viktorbarzin.me"
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# Security headers middleware (HSTS, X-Frame-Options, etc.)
+resource "kubernetes_manifest" "middleware_security_headers" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "security-headers"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ headers = {
+ stsSeconds = 31536000
+ stsIncludeSubdomains = true
+ frameDeny = true
+ contentTypeNosniff = true
+ browserXssFilter = true
+ referrerPolicy = "strict-origin-when-cross-origin"
+ permissionsPolicy = "camera=(), microphone=(), geolocation=()"
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# CrowdSec bouncer plugin middleware
+resource "kubernetes_manifest" "middleware_crowdsec" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "crowdsec"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ plugin = {
+ crowdsec-bouncer = {
+ crowdsecLapiKey = var.crowdsec_api_key
+ crowdsecLapiHost = "crowdsec-service.crowdsec.svc.cluster.local:8080"
+ crowdsecMode = "stream"
+ updateMaxFailure = -1 # fail-open: serve from cache when LAPI is unreachable
+ redisCacheEnabled = true
+ redisCacheHost = var.redis_host
+ redisCacheUnreachableBlock = false # don't block traffic if Redis is also unreachable
+ clientTrustedIPs = ["10.0.20.0/24", "10.10.0.0/16"] # node + pod CIDRs bypass CrowdSec
+ }
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# TLS option for mTLS (client certificate auth)
+resource "kubernetes_manifest" "tls_option_mtls" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "TLSOption"
+ metadata = {
+ name = "mtls"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ clientAuth = {
+ secretNames = ["ca-secret"]
+ clientAuthType = "RequireAndVerifyClientCert"
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# ServersTransport for backends with self-signed certificates
+resource "kubernetes_manifest" "servers_transport_insecure" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "ServersTransport"
+ metadata = {
+ name = "insecure-skip-verify"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ insecureSkipVerify = true
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# Strip Authentik auth headers/cookies before forwarding to backend
+# Useful for backends (iDRAC, TP-Link) that break when receiving extra headers
+resource "kubernetes_manifest" "middleware_strip_auth_headers" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "strip-auth-headers"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ headers = {
+ customRequestHeaders = {
+ "X-authentik-username" = ""
+ "X-authentik-uid" = ""
+ "X-authentik-email" = ""
+ "X-authentik-name" = ""
+ "X-authentik-groups" = ""
+ }
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# Immich-specific rate limit (higher limits for photo uploads)
+resource "kubernetes_manifest" "middleware_immich_rate_limit" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "immich-rate-limit"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ rateLimit = {
+ average = 100
+ burst = 1000
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# Strip Accept-Encoding header so backends send uncompressed responses.
+# Used alongside rewrite-body plugin (rybbit analytics) which fails to
+# decompress certain gzip responses (flate: corrupt input before offset 5).
+# Also used by anti-AI trap links rewrite-body middleware.
+resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "strip-accept-encoding"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ headers = {
+ customRequestHeaders = {
+ "Accept-Encoding" = ""
+ }
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# ForwardAuth middleware to block known AI bot User-Agents
+resource "kubernetes_manifest" "middleware_ai_bot_block" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "ai-bot-block"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ forwardAuth = {
+ address = "http://bot-block-proxy.traefik.svc.cluster.local:8080/auth"
+ trustForwardHeader = true
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# X-Robots-Tag header to discourage compliant AI crawlers
+resource "kubernetes_manifest" "middleware_anti_ai_headers" {
+ manifest = {
+ apiVersion = "traefik.io/v1alpha1"
+ kind = "Middleware"
+ metadata = {
+ name = "anti-ai-headers"
+ namespace = kubernetes_namespace.traefik.metadata[0].name
+ }
+ spec = {
+ headers = {
+ customResponseHeaders = {
+ "X-Robots-Tag" = "noai, noimageai"
+ }
+ }
+ }
+ }
+
+ depends_on = [helm_release.traefik]
+}
+
+# Inject hidden trap links before