diff --git a/.woodpecker/default.yml b/.woodpecker/default.yml index 3e083ac5..5b96f74f 100644 --- a/.woodpecker/default.yml +++ b/.woodpecker/default.yml @@ -45,18 +45,15 @@ steps: - "chmod 755 /usr/local/bin/terragrunt" # Source Vault token - "source .vault-env" - # Apply extracted stacks in parallel - - "cd stacks/dbaas && terragrunt apply --non-interactive -auto-approve &" - - "cd stacks/authentik && terragrunt apply --non-interactive -auto-approve &" - - "cd stacks/crowdsec && terragrunt apply --non-interactive -auto-approve &" - - "cd stacks/monitoring && terragrunt apply --non-interactive -auto-approve &" - - "cd stacks/nvidia && terragrunt apply --non-interactive -auto-approve &" - - "cd stacks/mailserver && terragrunt apply --non-interactive -auto-approve &" - - "cd stacks/cloudflared && terragrunt apply --non-interactive -auto-approve &" - - "cd stacks/kyverno && terragrunt apply --non-interactive -auto-approve &" - # Apply platform stack (remaining core infrastructure services) - - "cd stacks/platform && terragrunt apply --non-interactive -auto-approve" - - "wait" + # Apply all platform stacks in parallel + - | + for stack in dbaas authentik crowdsec monitoring nvidia mailserver cloudflared kyverno \ + metallb redis traefik technitium headscale rbac k8s-portal vaultwarden \ + reverse-proxy metrics-server vpa nfs-csi iscsi-csi cnpg sealed-secrets \ + uptime-kuma wireguard xray infra-maintenance platform; do + (cd stacks/$stack && terragrunt apply --non-interactive -auto-approve) & + done + wait - name: cleanup-and-push image: alpine diff --git a/stacks/cnpg/main.tf b/stacks/cnpg/main.tf new file mode 100644 index 00000000..cdb77325 --- /dev/null +++ b/stacks/cnpg/main.tf @@ -0,0 +1,4 @@ +module "cnpg" { + source = "./modules/cnpg" + tier = local.tiers.cluster +} diff --git a/stacks/cnpg/modules/cnpg/main.tf b/stacks/cnpg/modules/cnpg/main.tf new file mode 100644 index 00000000..64a1d730 --- /dev/null +++ b/stacks/cnpg/modules/cnpg/main.tf @@ -0,0 +1,53 @@ +variable "tier" { type = string } + +# ----------------------------------------------------------------------------- +# Namespace +# ----------------------------------------------------------------------------- +resource "kubernetes_namespace" "cnpg_system" { + metadata { + name = "cnpg-system" + labels = { + tier = var.tier + } + } +} + +# ----------------------------------------------------------------------------- +# CloudNativePG Operator — manages PostgreSQL clusters via CRDs +# https://cloudnative-pg.io/ +# ----------------------------------------------------------------------------- +resource "helm_release" "cnpg" { + namespace = kubernetes_namespace.cnpg_system.metadata[0].name + create_namespace = false + name = "cnpg" + atomic = true + timeout = 300 + + repository = "https://cloudnative-pg.github.io/charts" + chart = "cloudnative-pg" + version = "0.27.1" + + values = [yamlencode({ + crds = { + create = true + } + + replicaCount = 1 + + resources = { + requests = { + cpu = "100m" + memory = "256Mi" + } + limits = { + memory = "256Mi" + } + } + })] +} + +# NOTE: local-path-provisioner is already installed in the cluster +# (via cloud-init template) with StorageClass "local-path" (default). +# ReclaimPolicy is "Delete" — for CNPG clusters, set +# .spec.storage.pvcTemplate.storageClassName = "local-path" in the +# Cluster CR. CNPG handles PVC lifecycle independently. diff --git a/stacks/cnpg/secrets b/stacks/cnpg/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/cnpg/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/cnpg/terragrunt.hcl b/stacks/cnpg/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/cnpg/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/cnpg/tiers.tf b/stacks/cnpg/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/cnpg/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/headscale/main.tf b/stacks/headscale/main.tf new file mode 100644 index 00000000..a0c731f3 --- /dev/null +++ b/stacks/headscale/main.tf @@ -0,0 +1,21 @@ +variable "tls_secret_name" { type = string } +variable "nfs_server" { type = string } + +data "vault_kv_secret_v2" "secrets" { + mount = "secret" + name = "platform" +} + +locals { + homepage_credentials = jsondecode(data.vault_kv_secret_v2.secrets.data["homepage_credentials"]) +} + +module "headscale" { + source = "./modules/headscale" + tls_secret_name = var.tls_secret_name + nfs_server = var.nfs_server + headscale_config = data.vault_kv_secret_v2.secrets.data["headscale_config"] + headscale_acl = data.vault_kv_secret_v2.secrets.data["headscale_acl"] + homepage_token = try(local.homepage_credentials["headscale"]["api_key"], "") + tier = local.tiers.core +} diff --git a/stacks/headscale/modules/headscale/main.tf b/stacks/headscale/modules/headscale/main.tf new file mode 100644 index 00000000..4d5e9dc6 --- /dev/null +++ b/stacks/headscale/modules/headscale/main.tf @@ -0,0 +1,324 @@ + +variable "tls_secret_name" {} +variable "tier" { type = string } +variable "headscale_config" {} +variable "headscale_acl" {} +variable "nfs_server" { type = string } +variable "homepage_token" { + type = string + default = "" + sensitive = true +} + +resource "kubernetes_namespace" "headscale" { + metadata { + name = "headscale" + labels = { + tier = var.tier + } + } +} + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.headscale.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +module "nfs_data" { + source = "../../../../modules/kubernetes/nfs_volume" + name = "headscale-data" + namespace = kubernetes_namespace.headscale.metadata[0].name + nfs_server = var.nfs_server + nfs_path = "/mnt/main/headscale" +} + +resource "kubernetes_deployment" "headscale" { + metadata { + name = "headscale" + namespace = kubernetes_namespace.headscale.metadata[0].name + labels = { + app = "headscale" + tier = var.tier + # scare to try but probably non-http will fail + # "istio-injection" : "enabled" + } + + annotations = { + "reloader.stakater.com/search" = "true" + } + } + spec { + replicas = 1 + strategy { + type = "Recreate" + } + selector { + match_labels = { + app = "headscale" + } + } + template { + metadata { + labels = { + app = "headscale" + } + annotations = { + # "diun.enable" = "true" + "diun.enable" = "false" + "diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$" + } + } + spec { + container { + image = "headscale/headscale:0.23.0" + # image = "headscale/headscale:0.23.0-debug" # -debug is for debug images + name = "headscale" + command = ["headscale", "serve"] + + resources { + requests = { + cpu = "50m" + memory = "128Mi" + } + limits = { + memory = "128Mi" + } + } + + port { + container_port = 8080 + } + port { + container_port = 9090 + } + port { + container_port = 41641 + } + + liveness_probe { + http_get { + path = "/health" + port = 8080 + } + initial_delay_seconds = 15 + period_seconds = 30 + timeout_seconds = 5 + failure_threshold = 5 + } + readiness_probe { + http_get { + path = "/health" + port = 8080 + } + initial_delay_seconds = 5 + period_seconds = 30 + timeout_seconds = 5 + failure_threshold = 3 + } + + volume_mount { + name = "config-volume" + mount_path = "/etc/headscale" + } + + volume_mount { + mount_path = "/mnt" + name = "nfs-config" + } + } + volume { + name = "config-volume" + config_map { + name = "headscale-config" + items { + key = "config.yaml" + path = "config.yaml" + } + items { + key = "acl.yaml" + path = "acl.yaml" + } + } + } + + volume { + name = "nfs-config" + persistent_volume_claim { + claim_name = module.nfs_data.claim_name + } + } + # container { + # image = "simcu/headscale-ui:0.1.4" + # name = "headscale-ui" + # port { + # container_port = 80 + # } + # } + container { + image = "ghcr.io/gurucomputing/headscale-ui:latest" + # image = "ghcr.io/tale/headplane:0.3.2" + name = "headscale-ui" + + resources { + requests = { + cpu = "25m" + memory = "128Mi" + } + limits = { + memory = "128Mi" + } + } + + port { + container_port = 8081 + # container_port = 3000 + } + env { + name = "HTTP_PORT" + value = "8081" + } + # env { + # name = "HTTPS_PORT" + # value = "8082" + # } + env { + name = "HEADSCALE_URL" + value = "http://localhost:8080" + } + env { + name = "COOKIE_SECRET" + value = "kekekekke" + } + env { + name = "ROOT_API_KEY" + value = "kekekekeke" + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } +} +resource "kubernetes_service" "headscale" { + metadata { + name = "headscale" + namespace = kubernetes_namespace.headscale.metadata[0].name + labels = { + "app" = "headscale" + } + annotations = { + "prometheus.io/scrape" = "true" + "prometheus.io/port" = "9090" + } + # annotations = { + # "metallb.universe.tf/allow-shared-ip" : "shared" + # } + } + + spec { + # type = "LoadBalancer" + # external_traffic_policy = "Cluster" + selector = { + app = "headscale" + + } + port { + name = "headscale" + port = "8080" + protocol = "TCP" + } + port { + name = "headscale-ui" + port = "80" + target_port = 8081 + # target_port = 3000 + protocol = "TCP" + } + port { + name = "metrics" + port = "9090" + protocol = "TCP" + } + } +} + +module "ingress" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.headscale.metadata[0].name + name = "headscale" + port = 8080 + tls_secret_name = var.tls_secret_name + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Headscale" + "gethomepage.dev/description" = "VPN mesh network" + "gethomepage.dev/icon" = "headscale.png" + "gethomepage.dev/group" = "Identity & Security" + "gethomepage.dev/pod-selector" = "" + } +} + +module "ingress-ui" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.headscale.metadata[0].name + name = "headscale-ui" + host = "headscale" + service_name = "headscale" + port = 8081 + ingress_path = ["/web"] + tls_secret_name = var.tls_secret_name +} + +resource "kubernetes_service" "headscale-server" { + metadata { + name = "headscale-server" + namespace = kubernetes_namespace.headscale.metadata[0].name + labels = { + "app" = "headscale" + } + annotations = { + "metallb.universe.tf/allow-shared-ip" : "shared" + } + } + + spec { + type = "LoadBalancer" + external_traffic_policy = "Cluster" + selector = { + app = "headscale" + + } + # port { + # name = "headscale-tcp" + # port = "41641" + # protocol = "TCP" + # } + port { + name = "headscale-udp" + port = "41641" + protocol = "UDP" + } + } +} + +resource "kubernetes_config_map" "headscale-config" { + metadata { + name = "headscale-config" + namespace = kubernetes_namespace.headscale.metadata[0].name + + annotations = { + "reloader.stakater.com/match" = "true" + } + } + + data = { + "config.yaml" = var.headscale_config + "acl.yaml" = var.headscale_acl + } +} diff --git a/stacks/headscale/secrets b/stacks/headscale/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/headscale/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/headscale/terragrunt.hcl b/stacks/headscale/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/headscale/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/headscale/tiers.tf b/stacks/headscale/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/headscale/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/infra-maintenance/main.tf b/stacks/infra-maintenance/main.tf new file mode 100644 index 00000000..7faba6ea --- /dev/null +++ b/stacks/infra-maintenance/main.tf @@ -0,0 +1,15 @@ +variable "nfs_server" { type = string } + +data "vault_kv_secret_v2" "secrets" { + mount = "secret" + name = "platform" +} + +module "infra-maintenance" { + source = "./modules/infra-maintenance" + nfs_server = var.nfs_server + git_user = data.vault_kv_secret_v2.secrets.data["webhook_handler_git_user"] + git_token = data.vault_kv_secret_v2.secrets.data["webhook_handler_git_token"] + technitium_username = data.vault_kv_secret_v2.secrets.data["technitium_username"] + technitium_password = data.vault_kv_secret_v2.secrets.data["technitium_password"] +} diff --git a/stacks/infra-maintenance/modules/infra-maintenance/main.tf b/stacks/infra-maintenance/modules/infra-maintenance/main.tf new file mode 100644 index 00000000..abf50453 --- /dev/null +++ b/stacks/infra-maintenance/modules/infra-maintenance/main.tf @@ -0,0 +1,274 @@ +# Module to run some infra-specific things like updating the public ip +variable "git_user" {} +variable "git_token" {} +variable "technitium_username" {} +variable "technitium_password" {} +variable "nfs_server" { type = string } + + +# DISABLED WHILST USING CLOUDFLARE NS +# resource "kubernetes_cron_job_v1" "update-public-ip" { +# metadata { +# name = "update-public-ip" +# namespace = "default" +# } +# spec { +# schedule = "*/5 * * * *" +# successful_jobs_history_limit = 1 +# failed_jobs_history_limit = 1 +# concurrency_policy = "Forbid" +# job_template { +# metadata { +# name = "update-public-ip" +# } +# spec { +# template { +# metadata { +# name = "update-public-ip" +# } +# spec { +# priority_class_name = "system-cluster-critical" +# container { +# name = "update-public-ip" +# image = "viktorbarzin/infra" +# command = ["./infra_cli"] +# args = ["-use-case", "update-public-ip"] + +# env { +# name = "GIT_USER" +# value = var.git_user +# } +# env { +# name = "GIT_TOKEN" +# value = var.git_token +# } +# env { +# name = "TECHNITIUM_USERNAME" +# value = var.technitium_username +# } +# env { +# name = "TECHNITIUM_PASSWORD" +# value = var.technitium_password +# } +# } +# restart_policy = "Never" +# # service_account_name = "descheduler-sa" +# # volume { +# # name = "policy-volume" +# # config_map { +# # name = "policy-configmap" +# # } +# # } +# } +# } +# } +# } +# } +# } + +module "nfs_etcd_backup" { + source = "../../../../modules/kubernetes/nfs_volume" + name = "infra-etcd-backup" + namespace = "default" + nfs_server = var.nfs_server + nfs_path = "/mnt/main/etcd-backup" +} + +# # backup etcd +resource "kubernetes_cron_job_v1" "backup-etcd" { + metadata { + name = "backup-etcd" + namespace = "default" + } + spec { + schedule = "0 0 * * *" + successful_jobs_history_limit = 1 + failed_jobs_history_limit = 1 + concurrency_policy = "Forbid" + job_template { + metadata { + name = "backup-etcd" + } + spec { + template { + metadata { + name = "backup-etcd" + } + spec { + node_name = "k8s-master" + priority_class_name = "system-cluster-critical" + host_network = true + container { + name = "backup-etcd" + image = "registry.k8s.io/etcd:3.5.21-0" + command = ["etcdctl"] + args = ["--endpoints=https://127.0.0.1:2379", "--cacert=/etc/kubernetes/pki/etcd/ca.crt", "--cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt", "--key=/etc/kubernetes/pki/etcd/healthcheck-client.key", "snapshot", "save", "/backup/etcd-snapshot-latest.db"] + env { + name = "ETCDCTL_API" + value = "3" + } + volume_mount { + mount_path = "/backup" + name = "backup" + } + volume_mount { + mount_path = "/etc/kubernetes/pki/etcd" + name = "etcd-certs" + read_only = true + } + } + container { + name = "backup-purge" + image = "busybox:1.31.1" + command = ["/bin/sh"] + args = ["-c", "find /backup -type f -mtime +30 -name '*.db' -exec rm -- '{}' \\;"] + + volume_mount { + mount_path = "/backup" + name = "backup" + } + } + + volume { + name = "backup" + persistent_volume_claim { + claim_name = module.nfs_etcd_backup.claim_name + } + } + volume { + name = "etcd-certs" + host_path { + path = "/etc/kubernetes/pki/etcd" + type = "DirectoryOrCreate" + } + } + restart_policy = "Never" + } + } + } + } + } +} + +# Weekly etcd defragmentation — prevents fragmentation buildup that causes slow requests +resource "kubernetes_cron_job_v1" "defrag-etcd" { + metadata { + name = "defrag-etcd" + namespace = "default" + } + spec { + schedule = "0 3 * * 0" + successful_jobs_history_limit = 1 + failed_jobs_history_limit = 1 + concurrency_policy = "Forbid" + job_template { + metadata { + name = "defrag-etcd" + } + spec { + template { + metadata { + name = "defrag-etcd" + } + spec { + node_name = "k8s-master" + priority_class_name = "system-cluster-critical" + host_network = true + container { + name = "defrag-etcd" + image = "registry.k8s.io/etcd:3.5.21-0" + command = ["etcdctl"] + args = ["--endpoints=https://127.0.0.1:2379", "--cacert=/etc/kubernetes/pki/etcd/ca.crt", "--cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt", "--key=/etc/kubernetes/pki/etcd/healthcheck-client.key", "--command-timeout=60s", "defrag"] + env { + name = "ETCDCTL_API" + value = "3" + } + volume_mount { + mount_path = "/etc/kubernetes/pki/etcd" + name = "etcd-certs" + read_only = true + } + } + volume { + name = "etcd-certs" + host_path { + path = "/etc/kubernetes/pki/etcd" + type = "DirectoryOrCreate" + } + } + restart_policy = "Never" + } + } + } + } + } +} + +# Clean up evicted/failed pods cluster-wide daily +resource "kubernetes_cron_job_v1" "cleanup-failed-pods" { + metadata { + name = "cleanup-failed-pods" + namespace = "default" + } + spec { + schedule = "0 2 * * *" + successful_jobs_history_limit = 1 + failed_jobs_history_limit = 1 + concurrency_policy = "Forbid" + job_template { + metadata { + name = "cleanup-failed-pods" + } + spec { + template { + metadata { + name = "cleanup-failed-pods" + } + spec { + service_account_name = kubernetes_service_account.cleanup_sa.metadata[0].name + container { + name = "cleanup" + image = "bitnami/kubectl:latest" + command = ["/bin/sh", "-c", "kubectl delete pods -A --field-selector=status.phase=Failed --ignore-not-found"] + } + restart_policy = "Never" + } + } + } + } + } +} + +resource "kubernetes_service_account" "cleanup_sa" { + metadata { + name = "failed-pod-cleanup" + namespace = "default" + } +} + +resource "kubernetes_cluster_role" "cleanup_role" { + metadata { + name = "failed-pod-cleanup" + } + rule { + api_groups = [""] + resources = ["pods"] + verbs = ["list", "delete"] + } +} + +resource "kubernetes_cluster_role_binding" "cleanup_binding" { + metadata { + name = "failed-pod-cleanup" + } + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = kubernetes_cluster_role.cleanup_role.metadata[0].name + } + subject { + kind = "ServiceAccount" + name = kubernetes_service_account.cleanup_sa.metadata[0].name + namespace = "default" + } +} diff --git a/stacks/infra-maintenance/secrets b/stacks/infra-maintenance/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/infra-maintenance/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/infra-maintenance/terragrunt.hcl b/stacks/infra-maintenance/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/infra-maintenance/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/infra-maintenance/tiers.tf b/stacks/infra-maintenance/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/infra-maintenance/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/iscsi-csi/main.tf b/stacks/iscsi-csi/main.tf new file mode 100644 index 00000000..0cbc798d --- /dev/null +++ b/stacks/iscsi-csi/main.tf @@ -0,0 +1,14 @@ +variable "nfs_server" { type = string } + +data "vault_kv_secret_v2" "secrets" { + mount = "secret" + name = "platform" +} + +module "iscsi-csi" { + source = "./modules/iscsi-csi" + tier = local.tiers.cluster + truenas_host = var.nfs_server + truenas_api_key = data.vault_kv_secret_v2.secrets.data["truenas_api_key"] + truenas_ssh_private_key = data.vault_kv_secret_v2.secrets.data["truenas_ssh_private_key"] +} diff --git a/stacks/iscsi-csi/modules/iscsi-csi/main.tf b/stacks/iscsi-csi/modules/iscsi-csi/main.tf new file mode 100644 index 00000000..4c4a8d59 --- /dev/null +++ b/stacks/iscsi-csi/modules/iscsi-csi/main.tf @@ -0,0 +1,148 @@ +resource "kubernetes_namespace" "iscsi_csi" { + metadata { + name = "iscsi-csi" + labels = { + tier = var.tier + "resource-governance/custom-quota" = "true" + } + } +} + +resource "helm_release" "democratic_csi" { + namespace = kubernetes_namespace.iscsi_csi.metadata[0].name + create_namespace = false + name = "democratic-csi-iscsi" + atomic = true + timeout = 300 + + repository = "https://democratic-csi.github.io/charts/" + chart = "democratic-csi" + + values = [yamlencode({ + csiDriver = { + name = "org.democratic-csi.iscsi" + } + + storageClasses = [{ + name = "iscsi-truenas" + defaultClass = false + reclaimPolicy = "Retain" + volumeBindingMode = "Immediate" + allowVolumeExpansion = true + parameters = { + fsType = "ext4" + } + mountOptions = [] + }] + + controller = { + replicas = 2 + driver = { + resources = { + requests = { cpu = "25m", memory = "192Mi" } + limits = { memory = "192Mi" } + } + } + externalProvisioner = { + resources = { + requests = { cpu = "5m", memory = "64Mi" } + limits = { memory = "64Mi" } + } + } + externalAttacher = { + resources = { + requests = { cpu = "5m", memory = "64Mi" } + limits = { memory = "64Mi" } + } + } + externalResizer = { + resources = { + requests = { cpu = "5m", memory = "64Mi" } + limits = { memory = "64Mi" } + } + } + externalSnapshotter = { + resources = { + requests = { cpu = "5m", memory = "80Mi" } + limits = { memory = "80Mi" } + } + } + } + + # csiProxy is a top-level chart key, NOT nested under controller/node + csiProxy = { + resources = { + requests = { cpu = "5m", memory = "32Mi" } + limits = { memory = "32Mi" } + } + } + + node = { + driver = { + resources = { + requests = { cpu = "25m", memory = "192Mi" } + limits = { memory = "192Mi" } + } + } + driverRegistrar = { + resources = { + requests = { cpu = "5m", memory = "32Mi" } + limits = { memory = "32Mi" } + } + } + cleanup = { + resources = { + requests = { cpu = "5m", memory = "32Mi" } + limits = { memory = "32Mi" } + } + } + + hostPID = true + hostPath = "/lib/modules" + } + + driver = { + config = { + driver = "freenas-iscsi" + + instance_id = "truenas-iscsi" + + httpConnection = { + protocol = "http" + host = var.truenas_host + port = 80 + apiKey = var.truenas_api_key + } + + sshConnection = { + host = var.truenas_host + port = 22 + username = "root" + privateKey = var.truenas_ssh_private_key + } + + zfs = { + datasetParentName = "main/iscsi" + detachedSnapshotsDatasetParentName = "main/iscsi-snaps" + } + + iscsi = { + targetPortal = "${var.truenas_host}:3260" + namePrefix = "csi-" + nameSuffix = "" + targetGroups = [{ + targetGroupPortalGroup = 1 + targetGroupInitiatorGroup = 1 + targetGroupAuthType = "None" + }] + extentInsecureTpc = true + extentXenCompat = false + extentDisablePhysicalBlocksize = true + extentBlocksize = 512 + extentRpm = "SSD" + extentAvailThreshold = 0 + } + } + } + })] +} diff --git a/stacks/iscsi-csi/modules/iscsi-csi/variables.tf b/stacks/iscsi-csi/modules/iscsi-csi/variables.tf new file mode 100644 index 00000000..893fe396 --- /dev/null +++ b/stacks/iscsi-csi/modules/iscsi-csi/variables.tf @@ -0,0 +1,10 @@ +variable "tier" { type = string } +variable "truenas_host" { type = string } +variable "truenas_api_key" { + type = string + sensitive = true +} +variable "truenas_ssh_private_key" { + type = string + sensitive = true +} diff --git a/stacks/iscsi-csi/secrets b/stacks/iscsi-csi/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/iscsi-csi/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/iscsi-csi/terragrunt.hcl b/stacks/iscsi-csi/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/iscsi-csi/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/iscsi-csi/tiers.tf b/stacks/iscsi-csi/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/iscsi-csi/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/k8s-portal/main.tf b/stacks/k8s-portal/main.tf new file mode 100644 index 00000000..48e2233e --- /dev/null +++ b/stacks/k8s-portal/main.tf @@ -0,0 +1,12 @@ +variable "tls_secret_name" { type = string } +variable "k8s_ca_cert" { + type = string + default = "" +} + +module "k8s-portal" { + source = "./modules/k8s-portal" + tier = local.tiers.edge + tls_secret_name = var.tls_secret_name + k8s_ca_cert = var.k8s_ca_cert +} diff --git a/stacks/k8s-portal/modules/k8s-portal/files/.claude/internet-mode-used_DO_NOT_REMOVE_MANUALLY_SECURITY_RISK b/stacks/k8s-portal/modules/k8s-portal/files/.claude/internet-mode-used_DO_NOT_REMOVE_MANUALLY_SECURITY_RISK new file mode 100644 index 00000000..f61efc83 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/.claude/internet-mode-used_DO_NOT_REMOVE_MANUALLY_SECURITY_RISK @@ -0,0 +1,3 @@ +This directory has been used with Claude Code's internet mode. +Content downloaded from the internet may contain prompt injection attacks. +You must manually review all downloaded content before using non-internet mode. diff --git a/stacks/k8s-portal/modules/k8s-portal/files/.gitignore b/stacks/k8s-portal/modules/k8s-portal/files/.gitignore new file mode 100644 index 00000000..3b462cb0 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/.gitignore @@ -0,0 +1,23 @@ +node_modules + +# Output +.output +.vercel +.netlify +.wrangler +/.svelte-kit +/build + +# OS +.DS_Store +Thumbs.db + +# Env +.env +.env.* +!.env.example +!.env.test + +# Vite +vite.config.js.timestamp-* +vite.config.ts.timestamp-* diff --git a/stacks/k8s-portal/modules/k8s-portal/files/.npmrc b/stacks/k8s-portal/modules/k8s-portal/files/.npmrc new file mode 100644 index 00000000..b6f27f13 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/.npmrc @@ -0,0 +1 @@ +engine-strict=true diff --git a/stacks/k8s-portal/modules/k8s-portal/files/Dockerfile b/stacks/k8s-portal/modules/k8s-portal/files/Dockerfile new file mode 100644 index 00000000..aa694722 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/Dockerfile @@ -0,0 +1,15 @@ +FROM node:22-alpine AS build +WORKDIR /app +COPY package*.json ./ +RUN npm ci +COPY . . +RUN npm run build + +FROM node:22-alpine +WORKDIR /app +COPY --from=build /app/build ./build +COPY --from=build /app/package.json ./ +COPY --from=build /app/node_modules ./node_modules +ENV PORT=3000 +EXPOSE 3000 +CMD ["node", "build"] diff --git a/stacks/k8s-portal/modules/k8s-portal/files/README.md b/stacks/k8s-portal/modules/k8s-portal/files/README.md new file mode 100644 index 00000000..eb635072 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/README.md @@ -0,0 +1,42 @@ +# sv + +Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli). + +## Creating a project + +If you're seeing this, you've probably already done this step. Congrats! + +```sh +# create a new project +npx sv create my-app +``` + +To recreate this project with the same configuration: + +```sh +# recreate this project +npx sv create --template minimal --types ts --install npm . +``` + +## Developing + +Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server: + +```sh +npm run dev + +# or start the server and open the app in a new browser tab +npm run dev -- --open +``` + +## Building + +To create a production version of your app: + +```sh +npm run build +``` + +You can preview the production build with `npm run preview`. + +> To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment. diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/app.d.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/app.d.ts new file mode 100644 index 00000000..da08e6da --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/app.d.ts @@ -0,0 +1,13 @@ +// See https://svelte.dev/docs/kit/types#app.d.ts +// for information about these interfaces +declare global { + namespace App { + // interface Error {} + // interface Locals {} + // interface PageData {} + // interface PageState {} + // interface Platform {} + } +} + +export {}; diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/app.html b/stacks/k8s-portal/modules/k8s-portal/files/src/app.html new file mode 100644 index 00000000..f273cc58 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/app.html @@ -0,0 +1,11 @@ + + + + + + %sveltekit.head% + + +
%sveltekit.body%
+ + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/lib/assets/favicon.svg b/stacks/k8s-portal/modules/k8s-portal/files/src/lib/assets/favicon.svg new file mode 100644 index 00000000..cc5dc66a --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/lib/assets/favicon.svg @@ -0,0 +1 @@ +svelte-logo \ No newline at end of file diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/lib/index.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/lib/index.ts new file mode 100644 index 00000000..856f2b6c --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/lib/index.ts @@ -0,0 +1 @@ +// place files you want to import through the `$lib` alias in this folder. diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+layout.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+layout.svelte new file mode 100644 index 00000000..d412c4d6 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+layout.svelte @@ -0,0 +1,64 @@ + + + + + + + + +{@render children()} + + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.server.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.server.ts new file mode 100644 index 00000000..cc532664 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.server.ts @@ -0,0 +1,33 @@ +import type { PageServerLoad } from './$types'; +import { readFileSync } from 'fs'; + +interface UserRole { + role: string; + namespaces: string[]; +} + +export const load: PageServerLoad = async ({ request }) => { + const email = request.headers.get('x-authentik-email') || 'unknown'; + const username = request.headers.get('x-authentik-username') || 'unknown'; + const groups = request.headers.get('x-authentik-groups') || ''; + + // Read user roles from ConfigMap-mounted file + let userRole: UserRole = { role: 'unknown', namespaces: [] }; + try { + const usersJson = readFileSync('/config/users.json', 'utf-8'); + const users = JSON.parse(usersJson); + if (users[email]) { + userRole = users[email]; + } + } catch { + // ConfigMap not mounted or parse error + } + + return { + email, + username, + groups: groups.split('|').filter(Boolean), + role: userRole.role, + namespaces: userRole.namespaces + }; +}; diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.svelte new file mode 100644 index 00000000..2d13fa39 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/+page.svelte @@ -0,0 +1,102 @@ + + +
+

Kubernetes Access Portal

+ +
+ VPN Required — The cluster is on a private network. You need Headscale VPN access before kubectl will work. + See the Getting Started guide for VPN setup instructions. +
+ +
+

Your Identity

+

Username: {data.username}

+

Email: {data.email}

+

Role: {data.role}

+ {#if data.namespaces.length > 0} +

Namespaces: {data.namespaces.join(', ')}

+ {/if} +
+ + {#if data.role === 'namespace-owner'} +
+

Your Namespace

+

Assigned namespaces: {data.namespaces.join(', ')}

+ +

Quick Commands

+
+# Check your pods
+kubectl get pods -n {data.namespaces[0]}
+
+# View quota usage
+kubectl describe resourcequota -n {data.namespaces[0]}
+
+# Log into Vault
+vault login -method=oidc
+
+# Store a secret
+vault kv put secret/{data.username}/myapp KEY=value
+
+# Get K8s deploy token
+vault write kubernetes/creds/{data.namespaces[0]}-deployer \
+  kubernetes_namespace={data.namespaces[0]}
+
+ {/if} + +
+

Get Started

+
    + {#if data.role === 'namespace-owner'} +
  1. Complete the namespace-owner onboarding guide
  2. + {:else} +
  3. Complete the onboarding guide (VPN, kubectl, git)
  4. + {/if} +
  5. Install kubectl and kubelogin
  6. +
  7. Download your kubeconfig
  8. +
  9. Run kubectl get namespaces to verify access
  10. +
+
+ +
+

Resources

+ +
+
+ + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+page.svelte new file mode 100644 index 00000000..bf7f3f3a --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+page.svelte @@ -0,0 +1,61 @@ +
+

Agent Bootstrap

+

Point any AI coding agent at this cluster and it can bootstrap itself automatically.

+ +
+

For AI Agents

+

Fetch the machine-readable bootstrap document:

+
curl -fsSL https://k8s-portal.viktorbarzin.me/agent
+

This returns a plain-text markdown document with everything an agent needs: setup commands, critical rules, secrets workflow, Terraform conventions, key file paths, and common operations.

+
+ +
+

Usage with Claude Code

+
claude "$(curl -fsSL https://k8s-portal.viktorbarzin.me/agent)" "Deploy a new echo service"
+

Or within a session:

+
    +
  1. Clone the repo: git clone https://github.com/ViktorBarzin/infra.git && cd infra
  2. +
  3. Start Claude Code: claude
  4. +
  5. Claude auto-reads AGENTS.md and .claude/CLAUDE.md from the repo
  6. +
+
+ +
+

Usage with Codex / Other Agents

+
    +
  1. Clone the repo and cd into it
  2. +
  3. Run the setup script: bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)
  4. +
  5. Start the agent — it will read AGENTS.md for instructions
  6. +
+

If the agent doesn't auto-read AGENTS.md, feed it the bootstrap doc:

+
curl -fsSL https://k8s-portal.viktorbarzin.me/agent
+
+ +
+

What the Agent Gets

+ +
+ +
+

The /agent Endpoint

+

The endpoint is unauthenticated — no login required. Agents can curl or WebFetch it directly without a browser session, just like the setup script.

+

Content-Type: text/plain — no HTML parsing needed.

+
+
+ + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts new file mode 100644 index 00000000..3d0fa891 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts @@ -0,0 +1,161 @@ +import type { RequestHandler } from './$types'; + +const BOOTSTRAP_DOC = `# Infrastructure Cluster — AI Agent Bootstrap + +> Fetch this document: \`curl -fsSL https://k8s-portal.viktorbarzin.me/agent\` + +## Quick Start + +\`\`\`bash +# 1. Install tools (kubectl, kubelogin, kubeseal) +bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux) + +# 2. Clone the infrastructure repo +git clone https://github.com/ViktorBarzin/infra.git && cd infra + +# 3. Verify cluster access (opens browser for OIDC login on first run) +kubectl get namespaces +\`\`\` + +## Critical Rules (MUST FOLLOW) + +- **ALL changes through Terraform/Terragrunt** — NEVER \`kubectl apply/edit/patch/delete\` for persistent changes. Read-only kubectl is fine. +- **NEVER put secrets in plaintext** — use Sealed Secrets (\`kubeseal\`) or \`secrets.sops.json\` (SOPS-encrypted). +- **NEVER restart NFS on TrueNAS** — causes cluster-wide mount failures across all pods. +- **NEVER commit secrets** — triple-check before every commit. +- **\`[ci skip]\` in commit messages** when changes were already applied locally. +- **Ask before \`git push\`** — always confirm with the user first. + +## Sealed Secrets (Self-Service) + +You can manage your own secrets without SOPS access using \`kubeseal\`: + +\`\`\`bash +# 1. Create a sealed secret +kubectl create secret generic \\ + --from-literal=key=value -n \\ + --dry-run=client -o yaml | \\ + kubeseal --controller-name sealed-secrets \\ + --controller-namespace sealed-secrets -o yaml > sealed-.yaml + +# 2. Place the file in the stack directory: stacks//sealed-.yaml + +# 3. Ensure the stack's main.tf has the fileset block (add if missing): +\`\`\` + +\`\`\`hcl +resource "kubernetes_manifest" "sealed_secrets" { + for_each = fileset(path.module, "sealed-*.yaml") + manifest = yamldecode(file("\${path.module}/\${each.value}")) +} +\`\`\` + +\`\`\`bash +# 4. Push to PR — CI runs terragrunt apply — controller decrypts into real K8s Secrets +\`\`\` + +- Files MUST match the \`sealed-*.yaml\` glob pattern. +- Only the in-cluster controller has the private key. \`kubeseal\` uses the public key — safe to distribute. +- The \`kubernetes_manifest\` block is safe to add even with zero sealed-*.yaml files (empty for_each). + +## SOPS Secrets (Admin-Only Fallback) + +For secrets requiring admin access (shared infra passwords, API keys): +- **\`secrets.sops.json\`** — SOPS-encrypted secrets (JSON format) +- **Edit**: \`sops secrets.sops.json\` (opens $EDITOR, re-encrypts on save) +- **Add**: \`sops set secrets.sops.json '["new_key"]' '"value"'\` +- **Operators without SOPS keys**: comment on your PR asking Viktor to add the secret. + +## Terraform Conventions + +### Execution +- **Apply a service**: \`scripts/tg apply --non-interactive\` (auto-decrypts SOPS secrets) +- **Plan**: \`scripts/tg plan --non-interactive\` +- **kubectl**: \`kubectl --kubeconfig $(pwd)/config\` +- **Health check**: \`bash scripts/cluster_healthcheck.sh --quiet\` + +### Key Paths +| Path | Purpose | +|------|---------| +| \`stacks//main.tf\` | Service definition | +| \`stacks/platform/modules//\` | Core infra modules (~22) | +| \`modules/kubernetes/ingress_factory/\` | Standardized ingress (auth, rate limiting, anti-AI) | +| \`modules/kubernetes/nfs_volume/\` | NFS volume module (CSI-backed, soft mount) | +| \`config.tfvars\` | Non-secret configuration (plaintext) | +| \`secrets.sops.json\` | All secrets (SOPS-encrypted JSON) | +| \`scripts/cluster_healthcheck.sh\` | 25-check cluster health script | +| \`AGENTS.md\` | Full AI agent instructions (auto-loaded by most agents) | + +### Tier System +\`0-core\` | \`1-cluster\` | \`2-gpu\` | \`3-edge\` | \`4-aux\` + +Kyverno auto-generates LimitRange + ResourceQuota per namespace based on tier label. +- Containers without explicit \`resources {}\` get default limits (256Mi for edge/aux — causes OOMKill for heavy apps) +- Always set explicit resources on containers that need more than defaults +- Opt-out labels: \`resource-governance/custom-quota=true\` / \`resource-governance/custom-limitrange=true\` + +### Storage +- **NFS** (\`nfs-truenas\` StorageClass): For app data. Use the \`nfs_volume\` module. +- **iSCSI** (\`iscsi-truenas\` StorageClass): For databases (PostgreSQL, MySQL). + +### Shared Variables (never hardcode) +\`var.nfs_server\`, \`var.redis_host\`, \`var.postgresql_host\`, \`var.mysql_host\`, \`var.ollama_host\`, \`var.mail_host\` + +## Architecture + +- Terragrunt-based homelab managing a Kubernetes cluster (5 nodes, v1.34.2) on Proxmox VMs +- 70+ services, each in \`stacks//\` with its own Terraform state +- Core platform: \`stacks/platform/modules/\` (Traefik, Kyverno, monitoring, dbaas, sealed-secrets, etc.) +- Public domain: \`viktorbarzin.me\` (Cloudflare) | Internal: \`viktorbarzin.lan\` (Technitium DNS) +- CI/CD: Woodpecker CI — PRs run plan, merges to master auto-apply platform stack + +## Common Operations + +### Deploy a New Service +1. Copy an existing stack as template: \`cp -r stacks/echo stacks/my-service\` +2. Edit \`main.tf\` — update image, ports, ingress, resources +3. Add DNS in \`config.tfvars\` +4. Apply platform first if needed, then the service + +### Fix Crashed Pods +1. Run \`bash scripts/cluster_healthcheck.sh --quiet\` +2. Safe to delete evicted/failed pods and CrashLoopBackOff pods with >10 restarts +3. OOMKilled? Check \`kubectl describe limitrange tier-defaults -n \` and increase \`resources.limits.memory\` + +### Add a Secret +- **Self-service**: Use \`kubeseal\` (see Sealed Secrets section above) +- **Admin**: \`sops set secrets.sops.json '["key"]' '"value"'\` then commit + +## Contributing Workflow + +1. Create a branch: \`git checkout -b fix/my-change\` +2. Make changes in \`stacks//main.tf\` +3. Push and open a PR: \`git push -u origin fix/my-change\` +4. Viktor reviews and merges +5. CI applies automatically — Slack notification when done + +## Infrastructure Details + +- **Proxmox**: 192.168.1.127 (Dell R730, 22c/44t, 142GB RAM) +- **Nodes**: k8s-master (10.0.20.100), node1 (GPU, Tesla T4), node2-4 +- **GPU workloads**: \`node_selector = { "gpu": "true" }\` + toleration \`nvidia.com/gpu\` +- **Pull-through cache**: 10.0.20.10 — use versioned image tags (cache serves stale :latest manifests) +- **MySQL InnoDB Cluster**: 3 instances on iSCSI +- **SMTP**: \`var.mail_host\` port 587 STARTTLS + +## Further Reading + +- Full agent instructions: \`AGENTS.md\` in the repo root +- Patterns and examples: \`.claude/reference/patterns.md\` +- Service catalog: \`.claude/reference/service-catalog.md\` +- Onboarding guide: https://k8s-portal.viktorbarzin.me/onboarding +`; + +export const GET: RequestHandler = async () => { + return new Response(BOOTSTRAP_DOC, { + headers: { + 'Content-Type': 'text/plain; charset=utf-8', + 'Cache-Control': 'public, max-age=3600' + } + }); +}; diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/architecture/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/architecture/+page.svelte new file mode 100644 index 00000000..2790f074 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/architecture/+page.svelte @@ -0,0 +1,75 @@ +
+

Architecture

+ +
+

Overview

+

The infrastructure runs on a single Dell R730 server (22 CPU cores, 142GB RAM) using Proxmox to manage virtual machines. Five of those VMs form a Kubernetes cluster that runs 70+ services.

+
+Proxmox (Dell R730)
+ ├── k8s-master  (10.0.20.100) — control plane
+ ├── k8s-node1   (10.0.20.101) — GPU node (Tesla T4)
+ ├── k8s-node2   (10.0.20.102) — worker
+ ├── k8s-node3   (10.0.20.103) — worker
+ ├── k8s-node4   (10.0.20.104) — worker
+ ├── TrueNAS     (10.0.10.15)  — storage (NFS + iSCSI)
+ └── pfSense     (10.0.20.1)   — firewall + gateway
+
+ +
+

Networking

+
    +
  • Public domain: viktorbarzin.me — managed by Cloudflare
  • +
  • Internal domain: viktorbarzin.lan — managed by Technitium DNS
  • +
  • Ingress: Cloudflare → Traefik → services
  • +
  • VPN: Headscale (self-hosted Tailscale)
  • +
+
+ +
+

Storage

+
    +
  • NFS (nfs-truenas) — for app data (files, configs, media). Stored on TrueNAS.
  • +
  • iSCSI (iscsi-truenas) — for databases (PostgreSQL, MySQL). Block storage.
  • +
+
+ +
+

Service Tiers

+

Services are organized into tiers that control resource limits and restart priority:

+ + + + + + + + + +
TierExamplesPriority
0-coreTraefik, DNS, VPN, AuthHighest — never evicted
1-clusterRedis, Prometheus, CrowdSecHigh
2-gpuOllama, Immich ML, WhisperMedium
3-edgeNextcloud, Paperless, GrafanaNormal
4-auxDashy, PrivateBin, CyberChefLow — evicted first under pressure
+
+ +
+

Infrastructure as Code

+

Everything is managed with Terraform (via Terragrunt). Each service has its own stack:

+
stacks/
+ ├── platform/       ← core infra (22 modules)
+ ├── url/            ← URL shortener (Shlink)
+ ├── immich/         ← photo library
+ ├── nextcloud/      ← file storage
+ └── ... (70+ more)
+

Changes go through git: branch → PR → review → merge → CI applies automatically.

+
+
+ + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/contributing/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/contributing/+page.svelte new file mode 100644 index 00000000..2375a2cb --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/contributing/+page.svelte @@ -0,0 +1,115 @@ +
+

How to Contribute

+ +
+

Workflow

+
    +
  1. Create a branch: git checkout -b fix/my-change
  2. +
  3. Make your changes in stacks/<service>/main.tf
  4. +
  5. Push and open a PR: git push -u origin fix/my-change
  6. +
  7. Viktor reviews and merges
  8. +
  9. CI applies automatically — Slack notification when done
  10. +
+
+ +
+

What you CAN change

+
    +
  • Service configurations (image tags, environment variables, resource limits)
  • +
  • New services (add a new stack under stacks/)
  • +
  • Ingress routes, health probes, replica counts
  • +
+
+ +
+

What needs Viktor's review

+
    +
  • CI pipeline changes (.woodpecker/)
  • +
  • Terragrunt configuration (terragrunt.hcl)
  • +
  • Secrets configuration (.sops.yaml)
  • +
  • Core platform modules (stacks/platform/)
  • +
+
+ +
+

NEVER do these

+
+
    +
  • Never kubectl apply/edit/patch — all changes go through Terraform
  • +
  • Never put secrets in code — ask Viktor to add them to the encrypted secrets file
  • +
  • Never restart NFS on TrueNAS — causes cluster-wide mount failures
  • +
  • Never push directly to master — always use a PR
  • +
+
+
+ +
+

Need a new secret?

+

Comment on your PR: "I need a database password for my-service." Viktor will add it to the encrypted secrets file and push to your branch.

+

Then reference it in your Terraform: var.my_service_db_password

+
+ +
+

Namespace Owner Workflow

+

If you are a namespace owner, you can deploy your own apps:

+
    +
  1. Clone the infra repo: git clone https://github.com/ViktorBarzin/infra.git
  2. +
  3. Copy the template: cp -r stacks/_template stacks/your-app
  4. +
  5. Rename: mv stacks/your-app/main.tf.example stacks/your-app/main.tf
  6. +
  7. Edit main.tf — replace all <placeholders>
  8. +
  9. Store secrets in Vault: vault kv put secret/your-username/your-app KEY=value
  10. +
  11. Add your app domain to your domains list in Vault KV
  12. +
  13. Submit a PR, get it reviewed
  14. +
  15. After merge, admin runs terragrunt apply
  16. +
+
+ +
+

CI Pipeline Template

+

Create a .woodpecker.yml in your app's Forgejo repo:

+
{`steps:
+  - name: build
+    image: woodpeckerci/plugin-docker-buildx
+    settings:
+      repo: your-dockerhub-user/myapp
+      tag: ["\${CI_PIPELINE_NUMBER}", "latest"]
+      username:
+        from_secret: dockerhub-username
+      password:
+        from_secret: dockerhub-token
+      platforms: linux/amd64
+
+  - name: deploy
+    image: hashicorp/vault:1.18.1
+    commands:
+      - export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200
+      - export VAULT_TOKEN=$(vault write -field=token auth/kubernetes/login
+          role=ci jwt=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token))
+      - KUBE_TOKEN=$(vault write -field=service_account_token
+          kubernetes/creds/YOUR_NAMESPACE-deployer
+          kubernetes_namespace=YOUR_NAMESPACE)
+      - kubectl --server=https://kubernetes.default.svc
+          --token=$KUBE_TOKEN
+          --certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+          -n YOUR_NAMESPACE set image deployment/myapp
+          myapp=your-dockerhub-user/myapp:\${CI_PIPELINE_NUMBER}`}
+
+ +
+

Need a secret for your app?

+

As a namespace owner, you manage your own secrets in Vault:

+
vault kv put secret/your-username/your-app DB_PASSWORD=mysecret API_KEY=abc123
+

Then reference them in your Terraform using a data "vault_kv_secret_v2" block.

+
+
+ + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/download/+server.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/download/+server.ts new file mode 100644 index 00000000..28981156 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/download/+server.ts @@ -0,0 +1,58 @@ +import type { RequestHandler } from './$types'; +import { readFileSync } from 'fs'; + +const CLUSTER_SERVER = 'https://10.0.20.100:6443'; +const OIDC_ISSUER = 'https://authentik.viktorbarzin.me/application/o/kubernetes/'; +const OIDC_CLIENT_ID = 'kubernetes'; + +export const GET: RequestHandler = async ({ request }) => { + const email = request.headers.get('x-authentik-email') || 'user'; + + // Read CA cert from mounted ConfigMap + let caCert = ''; + try { + caCert = readFileSync('/config/ca.crt', 'utf-8'); + } catch { + // CA cert not available + } + + const caCertBase64 = Buffer.from(caCert).toString('base64'); + const sanitizedEmail = email.replace(/[^a-zA-Z0-9@._-]/g, ''); + + const kubeconfig = `apiVersion: v1 +kind: Config +clusters: +- cluster: + server: ${CLUSTER_SERVER} + certificate-authority-data: ${caCertBase64} + name: home-cluster +contexts: +- context: + cluster: home-cluster + user: oidc-${sanitizedEmail} + name: home-cluster +current-context: home-cluster +users: +- name: oidc-${sanitizedEmail} + user: + exec: + apiVersion: client.authentication.k8s.io/v1beta1 + command: kubectl + args: + - oidc-login + - get-token + - --oidc-issuer-url=${OIDC_ISSUER} + - --oidc-client-id=${OIDC_CLIENT_ID} + - --oidc-extra-scope=email + - --oidc-extra-scope=profile + - --oidc-extra-scope=groups + interactiveMode: IfAvailable +`; + + return new Response(kubeconfig, { + headers: { + 'Content-Type': 'application/yaml', + 'Content-Disposition': `attachment; filename="kubeconfig-home-cluster.yaml"` + } + }); +}; diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/onboarding/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/onboarding/+page.svelte new file mode 100644 index 00000000..812ea24e --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/onboarding/+page.svelte @@ -0,0 +1,146 @@ + + +
+

Getting Started

+

Welcome! Follow these steps to get access to the home Kubernetes cluster.

+ + + +
+

Step 0 — Join the VPN

+

The cluster is on a private network (10.0.20.0/24). You need VPN access first.

+
    +
  1. Install Tailscale for your OS
  2. +
  3. Run this in your terminal: +
    tailscale login --login-server https://headscale.viktorbarzin.me
    +
  4. +
  5. A browser window will open with a registration URL
  6. +
  7. Send that URL to Viktor via email (vbarzin@gmail.com) or Slack
  8. +
  9. Wait for approval (usually within a few hours)
  10. +
  11. Once approved, test:
    ping 10.0.20.100
  12. +
+
+ +
+

Step 1 — Log in to the portal

+

Visit k8s-portal.viktorbarzin.me and sign in with your Authentik account.

+

If you don't have an account yet, ask Viktor to create one.

+
+ +
+

Step 2 — Set up kubectl

+

Run one of these commands in your terminal to install everything automatically:

+

macOS

+

Requires Homebrew. Install it first if you don't have it.

+
bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=mac)
+

Linux

+
bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)
+

Windows

+

Use WSL2 and follow the Linux instructions.

+
+ + {#if showNamespaceOwner} +
+

Step 3 — Log into Vault

+

Vault manages your secrets and issues dynamic Kubernetes credentials.

+
vault login -method=oidc
+

This opens your browser for Authentik SSO. After login, your token is saved to ~/.vault-token.

+
+ +
+

Step 4 — Verify kubectl access

+

Run this command. It will open your browser for OIDC login the first time:

+
kubectl get pods -n YOUR_NAMESPACE
+

You should see an empty list (no resources) or your running pods.

+
+ +
+

Step 5 — Clone the infra repo

+
git clone https://github.com/ViktorBarzin/infra.git
+cd infra
+

This is where all the infrastructure configuration lives.

+
+ +
+

Step 6 — Create your first app stack

+
    +
  1. Copy the template:
    cp -r stacks/_template stacks/myapp
    +mv stacks/myapp/main.tf.example stacks/myapp/main.tf
  2. +
  3. Edit stacks/myapp/main.tf — replace all <placeholders>
  4. +
  5. Store secrets in Vault: +
    vault kv put secret/YOUR_USERNAME/myapp DB_PASSWORD=secret123
    +
  6. +
  7. Add your app domain to domains list in Vault KV k8s_users
  8. +
  9. Submit a PR: +
    git checkout -b feat/myapp
    +git add stacks/myapp/
    +git commit -m "add myapp stack"
    +git push -u origin feat/myapp
    +
  10. +
  11. Viktor reviews and merges
  12. +
  13. After merge: cd stacks/myapp && terragrunt apply
  14. +
+
+ {:else} +
+

Step 3 — Verify access

+

Run this command. It will open your browser for login the first time:

+
kubectl get namespaces
+

You should see output like:

+
NAME              STATUS   AGE
+default           Active   200d
+kube-system       Active   200d
+monitoring        Active   200d
+...
+

If you get a connection error, make sure your VPN is connected (tailscale status).

+
+ +
+

Step 4 — Clone the repo

+
git clone https://github.com/ViktorBarzin/infra.git
+cd infra
+

This is where all the infrastructure configuration lives.

+
+ +
+

Step 5 — Install your AI assistant (optional)

+

Install Codex CLI for AI-assisted cluster management:

+
npm install -g @openai/codex
+

Codex reads the AGENTS.md file in the repo and knows how to work with the cluster.

+
+ +
+

Step 6 — Your first change

+
    +
  1. Create a branch:
    git checkout -b my-first-change
  2. +
  3. Edit a service file (e.g., change an image tag in stacks/echo/main.tf)
  4. +
  5. Commit and push:
    git add . && git commit -m "my first change" && git push -u origin my-first-change
  6. +
  7. Open a Pull Request on GitHub
  8. +
  9. Viktor reviews and merges
  10. +
  11. Woodpecker CI automatically applies the change to the cluster
  12. +
  13. Slack notification confirms it worked
  14. +
+
+ {/if} +
+ + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/services/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/services/+page.svelte new file mode 100644 index 00000000..e9648bde --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/services/+page.svelte @@ -0,0 +1,58 @@ +
+

Service Catalog

+

70+ services running on the cluster. Here are the most commonly used:

+ +
+

Core Services

+ + + + + + + + +
ServiceURLDescription
Grafanagrafana.viktorbarzin.meMonitoring dashboards
Uptime Kumauptime.viktorbarzin.meService uptime monitoring
Authentikauthentik.viktorbarzin.meIdentity provider (SSO)
Woodpecker CIci.viktorbarzin.meCI/CD pipeline
+
+ +
+

User-Facing Services

+ + + + + + + + + + + +
ServiceURLDescription
Nextcloudnextcloud.viktorbarzin.meFile storage, calendar, contacts
Immichimmich.viktorbarzin.mePhoto library (Google Photos alternative)
Vaultwardenvault.viktorbarzin.mePassword manager
Paperless-ngxpdf.viktorbarzin.meDocument management
Navidromemusic.viktorbarzin.meMusic streaming
Tandoorrecipes.viktorbarzin.meRecipe manager
Linkwardenbookmarks.viktorbarzin.meBookmark manager
+
+ +
+

Developer Tools

+ + + + + + + + + +
ServiceURLDescription
Forgejoforgejo.viktorbarzin.meGit server (Gitea fork)
CyberChefcyberchef.viktorbarzin.meData transformation tool
Excalidrawdraw.viktorbarzin.meWhiteboard drawing
PrivateBinpaste.viktorbarzin.meEncrypted paste bin
JSON Crackjsoncrack.viktorbarzin.meJSON visualizer
+
+
+ + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/+page.svelte new file mode 100644 index 00000000..520681d4 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/+page.svelte @@ -0,0 +1,69 @@ +
+

Setup Instructions

+ +
+

Quick Setup (one command)

+

Run this in your terminal to install everything and configure kubectl automatically:

+

macOS

+
bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=mac)
+

Linux

+
bash <(curl -fsSL https://k8s-portal.viktorbarzin.me/setup/script?os=linux)
+
+ +
+

Manual Setup

+ +

1. Install kubectl

+

macOS

+
brew install kubectl
+

Linux

+
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
+chmod +x kubectl && sudo mv kubectl /usr/local/bin/
+ +

2. Install kubelogin (OIDC plugin)

+

macOS

+
brew install int128/kubelogin/kubelogin
+

Linux

+
curl -LO https://github.com/int128/kubelogin/releases/latest/download/kubelogin_linux_amd64.zip
+unzip kubelogin_linux_amd64.zip && sudo mv kubelogin /usr/local/bin/kubectl-oidc_login
+rm kubelogin_linux_amd64.zip
+ +

3. Download and use your kubeconfig

+
+mkdir -p ~/.kube
+
+# Download from the portal (requires auth cookie from browser)
+# Or use the download button on the portal homepage
+
+# Set the KUBECONFIG environment variable
+export KUBECONFIG=~/.kube/config-home
+
+# Test access (opens browser for login)
+kubectl get namespaces
+		
+
+ +

← Back to portal

+
+ + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/script/+server.ts b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/script/+server.ts new file mode 100644 index 00000000..82419194 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/setup/script/+server.ts @@ -0,0 +1,266 @@ +import type { RequestHandler } from './$types'; +import { readFileSync } from 'fs'; + +const CLUSTER_SERVER = 'https://10.0.20.100:6443'; +const OIDC_ISSUER = 'https://authentik.viktorbarzin.me/application/o/kubernetes/'; +const OIDC_CLIENT_ID = 'kubernetes'; + +export const GET: RequestHandler = async ({ url }) => { + const os = url.searchParams.get('os') || 'mac'; + + let caCert = ''; + try { + caCert = readFileSync('/config/ca.crt', 'utf-8'); + } catch { + // CA cert not available + } + const caCertBase64 = Buffer.from(caCert).toString('base64'); + + const kubeconfigContent = `apiVersion: v1 +kind: Config +clusters: +- cluster: + server: ${CLUSTER_SERVER} + certificate-authority-data: ${caCertBase64} + name: home-cluster +contexts: +- context: + cluster: home-cluster + user: oidc-user + name: home-cluster +current-context: home-cluster +users: +- name: oidc-user + user: + exec: + apiVersion: client.authentication.k8s.io/v1beta1 + command: kubectl + args: + - oidc-login + - get-token + - --oidc-issuer-url=${OIDC_ISSUER} + - --oidc-client-id=${OIDC_CLIENT_ID} + - --oidc-extra-scope=email + - --oidc-extra-scope=profile + - --oidc-extra-scope=groups + interactiveMode: IfAvailable`; + + let script: string; + + if (os === 'linux') { + script = `#!/bin/bash +set -e + +echo "=== Kubernetes Cluster Setup ===" +echo "" + +# Use sudo if available, otherwise install directly (e.g. in containers running as root) +SUDO="" +if [ "$(id -u)" -ne 0 ] && command -v sudo &>/dev/null; then + SUDO="sudo" +fi + +# Determine install directory +INSTALL_DIR="/usr/local/bin" +if [ ! -w "\$INSTALL_DIR" ] && [ -z "\$SUDO" ]; then + INSTALL_DIR="\$HOME/.local/bin" + mkdir -p "\$INSTALL_DIR" + export PATH="\$INSTALL_DIR:\$PATH" +fi + +# Install kubectl +if command -v kubectl &>/dev/null; then + echo "[OK] kubectl already installed" +else + echo "[..] Installing kubectl..." + KUBECTL_VERSION=\$(curl -L -s https://dl.k8s.io/release/stable.txt) + curl -fsSLO "https://dl.k8s.io/release/\${KUBECTL_VERSION}/bin/linux/amd64/kubectl" + chmod +x kubectl && \$SUDO mv kubectl "\$INSTALL_DIR/" + echo "[OK] kubectl installed" +fi + +# Install kubelogin +if command -v kubectl-oidc_login &>/dev/null; then + echo "[OK] kubelogin already installed" +else + echo "[..] Installing kubelogin..." + KUBELOGIN_VERSION=\$(curl -fsSL -o /dev/null -w "%{url_effective}" https://github.com/int128/kubelogin/releases/latest | grep -o '[^/]*\$') + curl -fsSLO "https://github.com/int128/kubelogin/releases/download/\${KUBELOGIN_VERSION}/kubelogin_linux_amd64.zip" + unzip -o kubelogin_linux_amd64.zip kubelogin -d /tmp + \$SUDO mv /tmp/kubelogin "\$INSTALL_DIR/kubectl-oidc_login" + rm -f kubelogin_linux_amd64.zip + echo "[OK] kubelogin installed" +fi + +# Install kubeseal +if command -v kubeseal &>/dev/null; then + echo "[OK] kubeseal already installed" +else + echo "[..] Installing kubeseal..." + KUBESEAL_VERSION=\$(curl -fsSL -o /dev/null -w "%{url_effective}" https://github.com/bitnami-labs/sealed-secrets/releases/latest | grep -o '[^/]*\$') + curl -fsSLO "https://github.com/bitnami-labs/sealed-secrets/releases/download/\${KUBESEAL_VERSION}/kubeseal-\${KUBESEAL_VERSION#v}-linux-amd64.tar.gz" + tar -xzf "kubeseal-\${KUBESEAL_VERSION#v}-linux-amd64.tar.gz" kubeseal + \$SUDO mv kubeseal "\$INSTALL_DIR/" + rm -f "kubeseal-\${KUBESEAL_VERSION#v}-linux-amd64.tar.gz" + echo "[OK] kubeseal installed" +fi + +# Install Vault CLI +if command -v vault &>/dev/null; then + echo "[OK] vault already installed" +else + echo "[..] Installing Vault CLI..." + VAULT_VERSION="1.18.1" + curl -fsSLO "https://releases.hashicorp.com/vault/\${VAULT_VERSION}/vault_\${VAULT_VERSION}_linux_amd64.zip" + unzip -o "vault_\${VAULT_VERSION}_linux_amd64.zip" vault -d /tmp + \$SUDO mv /tmp/vault "\$INSTALL_DIR/" + rm -f "vault_\${VAULT_VERSION}_linux_amd64.zip" + echo "[OK] vault installed" +fi + +# Install Terragrunt +if command -v terragrunt &>/dev/null; then + echo "[OK] terragrunt already installed" +else + echo "[..] Installing terragrunt..." + TG_VERSION=\$(curl -fsSL -o /dev/null -w "%{url_effective}" https://github.com/gruntwork-io/terragrunt/releases/latest | grep -o '[^/]*\$') + curl -fsSLO "https://github.com/gruntwork-io/terragrunt/releases/download/\${TG_VERSION}/terragrunt_linux_amd64" + chmod +x terragrunt_linux_amd64 + \$SUDO mv terragrunt_linux_amd64 "\$INSTALL_DIR/terragrunt" + echo "[OK] terragrunt installed" +fi + +# Install Terraform +if command -v terraform &>/dev/null; then + echo "[OK] terraform already installed" +else + echo "[..] Installing terraform..." + TF_VERSION="1.9.8" + curl -fsSLO "https://releases.hashicorp.com/terraform/\${TF_VERSION}/terraform_\${TF_VERSION}_linux_amd64.zip" + unzip -o "terraform_\${TF_VERSION}_linux_amd64.zip" terraform -d /tmp + \$SUDO mv /tmp/terraform "\$INSTALL_DIR/" + rm -f "terraform_\${TF_VERSION}_linux_amd64.zip" + echo "[OK] terraform installed" +fi + +# Write kubeconfig +mkdir -p ~/.kube +cat > ~/.kube/config-home << 'KUBECONFIG_EOF' +${kubeconfigContent} +KUBECONFIG_EOF +echo "[OK] Kubeconfig written to ~/.kube/config-home" + +# Add KUBECONFIG to shell profile +SHELL_RC=~/.bashrc +[ -f ~/.zshrc ] && SHELL_RC=~/.zshrc +if ! grep -q 'config-home' "\$SHELL_RC" 2>/dev/null; then + echo 'export KUBECONFIG=~/.kube/config-home' >> "\$SHELL_RC" + echo "[OK] Added KUBECONFIG to \$SHELL_RC" +fi +export KUBECONFIG=~/.kube/config-home + +echo "" +echo "=== Setup complete! ===" +echo "" +echo "Run 'kubectl get namespaces' to test (opens browser for login)." +echo "You may need to restart your shell or run: export KUBECONFIG=~/.kube/config-home" +`; + } else { + script = `#!/bin/bash +set -e + +echo "=== Kubernetes Cluster Setup ===" +echo "" + +# Check for Homebrew +if ! command -v brew &>/dev/null; then + echo "[!!] Homebrew not found. Install it first:" + echo ' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"' + exit 1 +fi + +# Install kubectl +if command -v kubectl &>/dev/null; then + echo "[OK] kubectl already installed ($(kubectl version --client -o json 2>/dev/null | grep -o '"gitVersion":"[^"]*"' | cut -d'"' -f4))" +else + echo "[..] Installing kubectl..." + brew install kubectl + echo "[OK] kubectl installed" +fi + +# Install kubelogin +if command -v kubectl-oidc_login &>/dev/null; then + echo "[OK] kubelogin already installed" +else + echo "[..] Installing kubelogin..." + brew install int128/kubelogin/kubelogin + echo "[OK] kubelogin installed" +fi + +# Install kubeseal +if command -v kubeseal &>/dev/null; then + echo "[OK] kubeseal already installed" +else + echo "[..] Installing kubeseal..." + brew install kubeseal + echo "[OK] kubeseal installed" +fi + +# Install Vault CLI +if command -v vault &>/dev/null; then + echo "[OK] vault already installed" +else + echo "[..] Installing Vault CLI..." + brew tap hashicorp/tap + brew install hashicorp/tap/vault + echo "[OK] vault installed" +fi + +# Install Terragrunt +if command -v terragrunt &>/dev/null; then + echo "[OK] terragrunt already installed" +else + echo "[..] Installing terragrunt..." + brew install terragrunt + echo "[OK] terragrunt installed" +fi + +# Install Terraform +if command -v terraform &>/dev/null; then + echo "[OK] terraform already installed" +else + echo "[..] Installing terraform..." + brew install hashicorp/tap/terraform + echo "[OK] terraform installed" +fi + +# Write kubeconfig +mkdir -p ~/.kube +cat > ~/.kube/config-home << 'KUBECONFIG_EOF' +${kubeconfigContent} +KUBECONFIG_EOF +echo "[OK] Kubeconfig written to ~/.kube/config-home" + +# Add KUBECONFIG to shell profile +SHELL_RC=~/.zshrc +[ ! -f ~/.zshrc ] && SHELL_RC=~/.bashrc +if ! grep -q 'config-home' "\$SHELL_RC" 2>/dev/null; then + echo 'export KUBECONFIG=~/.kube/config-home' >> "\$SHELL_RC" + echo "[OK] Added KUBECONFIG to \$SHELL_RC" +fi +export KUBECONFIG=~/.kube/config-home + +echo "" +echo "=== Setup complete! ===" +echo "" +echo "Run 'kubectl get namespaces' to test (opens browser for login)." +echo "You may need to restart your shell or run: export KUBECONFIG=~/.kube/config-home" +`; + } + + return new Response(script, { + headers: { + 'Content-Type': 'text/plain; charset=utf-8' + } + }); +}; diff --git a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/troubleshooting/+page.svelte b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/troubleshooting/+page.svelte new file mode 100644 index 00000000..17ac2e5a --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/troubleshooting/+page.svelte @@ -0,0 +1,63 @@ +
+

Troubleshooting

+ +
+

"kubectl can't connect to the server"

+
    +
  1. Check your VPN: tailscale status — should show "connected"
  2. +
  3. Check KUBECONFIG: echo $KUBECONFIG — should be ~/.kube/config-home
  4. +
  5. Test connectivity: ping 10.0.20.100
  6. +
  7. If ping works but kubectl doesn't, re-run the setup script
  8. +
+
+ +
+

"Forbidden" or "Permission denied"

+

You may not have access to that namespace. Your access is scoped to specific namespaces.

+

Try: kubectl get namespaces to see which namespaces you can access.

+

Need access to another namespace? Ask Viktor.

+
+ +
+

"Pod is CrashLoopBackOff"

+
    +
  1. Check pod logs: kubectl logs -n <namespace> <pod-name> --tail=50
  2. +
  3. Check previous crash: kubectl logs -n <namespace> <pod-name> --previous
  4. +
  5. Check events: kubectl describe pod -n <namespace> <pod-name>
  6. +
  7. Common causes: OOMKilled (need more memory), bad config, database connection failure
  8. +
+
+ +
+

"PR CI failed"

+
    +
  1. Check the Woodpecker CI dashboard: ci.viktorbarzin.me
  2. +
  3. Read the build logs — the error is usually at the bottom
  4. +
  5. Fix the issue, commit, and push — CI will re-run
  6. +
+
+ +
+

"I need a new secret / database password"

+

Secrets are managed by Viktor in an encrypted file. You cannot add them yourself.

+
    +
  1. Comment on your PR: "Need DB password for <service>"
  2. +
  3. Viktor adds the secret and pushes to your branch
  4. +
  5. Reference it as var.<service>_db_password in your Terraform
  6. +
+
+ +
+

Still stuck?

+

Email Viktor at vbarzin@gmail.com or message on Slack.

+
+
+ + diff --git a/stacks/k8s-portal/modules/k8s-portal/files/static/robots.txt b/stacks/k8s-portal/modules/k8s-portal/files/static/robots.txt new file mode 100644 index 00000000..b6dd6670 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/static/robots.txt @@ -0,0 +1,3 @@ +# allow crawling everything by default +User-agent: * +Disallow: diff --git a/stacks/k8s-portal/modules/k8s-portal/files/svelte.config.js b/stacks/k8s-portal/modules/k8s-portal/files/svelte.config.js new file mode 100644 index 00000000..6bfb3c40 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/svelte.config.js @@ -0,0 +1,10 @@ +import adapter from '@sveltejs/adapter-node'; + +/** @type {import('@sveltejs/kit').Config} */ +const config = { + kit: { + adapter: adapter() + } +}; + +export default config; diff --git a/stacks/k8s-portal/modules/k8s-portal/files/tsconfig.json b/stacks/k8s-portal/modules/k8s-portal/files/tsconfig.json new file mode 100644 index 00000000..2c2ed3c4 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/tsconfig.json @@ -0,0 +1,20 @@ +{ + "extends": "./.svelte-kit/tsconfig.json", + "compilerOptions": { + "rewriteRelativeImportExtensions": true, + "allowJs": true, + "checkJs": true, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "skipLibCheck": true, + "sourceMap": true, + "strict": true, + "moduleResolution": "bundler" + } + // Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias + // except $lib which is handled by https://svelte.dev/docs/kit/configuration#files + // + // To make changes to top-level options such as include and exclude, we recommend extending + // the generated config; see https://svelte.dev/docs/kit/configuration#typescript +} diff --git a/stacks/k8s-portal/modules/k8s-portal/files/vite.config.ts b/stacks/k8s-portal/modules/k8s-portal/files/vite.config.ts new file mode 100644 index 00000000..bbf8c7da --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/files/vite.config.ts @@ -0,0 +1,6 @@ +import { sveltekit } from '@sveltejs/kit/vite'; +import { defineConfig } from 'vite'; + +export default defineConfig({ + plugins: [sveltekit()] +}); diff --git a/stacks/k8s-portal/modules/k8s-portal/main.tf b/stacks/k8s-portal/modules/k8s-portal/main.tf new file mode 100644 index 00000000..53c375d6 --- /dev/null +++ b/stacks/k8s-portal/modules/k8s-portal/main.tf @@ -0,0 +1,166 @@ +variable "tls_secret_name" {} +variable "tier" { type = string } +variable "k8s_ca_cert" { + type = string + default = "" +} + +resource "kubernetes_namespace" "k8s_portal" { + metadata { + name = "k8s-portal" + labels = { + tier = var.tier + } + } +} + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.k8s_portal.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +resource "kubernetes_config_map" "k8s_portal_config" { + metadata { + name = "k8s-portal-config" + namespace = kubernetes_namespace.k8s_portal.metadata[0].name + } + + data = { + "ca.crt" = var.k8s_ca_cert + } +} + +resource "kubernetes_deployment" "k8s_portal" { + metadata { + name = "k8s-portal" + namespace = kubernetes_namespace.k8s_portal.metadata[0].name + labels = { + app = "k8s-portal" + tier = var.tier + } + } + + spec { + replicas = 1 + strategy { + type = "Recreate" + } + revision_history_limit = 3 + selector { + match_labels = { + app = "k8s-portal" + } + } + + template { + metadata { + labels = { + app = "k8s-portal" + } + } + + spec { + container { + name = "portal" + image = "viktorbarzin/k8s-portal:latest" + port { + container_port = 3000 + } + + volume_mount { + name = "config" + mount_path = "/config/ca.crt" + sub_path = "ca.crt" + read_only = true + } + volume_mount { + name = "user-roles" + mount_path = "/config/users.json" + sub_path = "users.json" + read_only = true + } + resources { + requests = { + cpu = "10m" + memory = "128Mi" + } + limits = { + memory = "128Mi" + } + } + } + + volume { + name = "config" + config_map { + name = kubernetes_config_map.k8s_portal_config.metadata[0].name + } + } + volume { + name = "user-roles" + config_map { + name = "k8s-user-roles" + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } + lifecycle { + ignore_changes = [ + spec[0].template[0].spec[0].dns_config, + spec[0].template[0].spec[0].container[0].image, # CI updates image tag + ] + } +} + +resource "kubernetes_service" "k8s_portal" { + metadata { + name = "k8s-portal" + namespace = kubernetes_namespace.k8s_portal.metadata[0].name + } + + spec { + selector = { + app = "k8s-portal" + } + port { + port = 80 + target_port = 3000 + } + } +} + +module "ingress" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.k8s_portal.metadata[0].name + name = "k8s-portal" + tls_secret_name = var.tls_secret_name + protected = true # Require Authentik login + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "K8s Portal" + "gethomepage.dev/description" = "Kubernetes portal" + "gethomepage.dev/icon" = "kubernetes.png" + "gethomepage.dev/group" = "Core Platform" + "gethomepage.dev/pod-selector" = "" + } +} + +# Unprotected ingress for the setup script and agent endpoint (needs to be curl-able without auth) +module "ingress_setup_script" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.k8s_portal.metadata[0].name + name = "k8s-portal-setup" + host = "k8s-portal" + service_name = "k8s-portal" + ingress_path = ["/setup/script", "/agent"] + tls_secret_name = var.tls_secret_name + protected = false +} diff --git a/stacks/k8s-portal/secrets b/stacks/k8s-portal/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/k8s-portal/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/k8s-portal/terragrunt.hcl b/stacks/k8s-portal/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/k8s-portal/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/k8s-portal/tiers.tf b/stacks/k8s-portal/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/k8s-portal/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/metallb/main.tf b/stacks/metallb/main.tf new file mode 100644 index 00000000..54d21f69 --- /dev/null +++ b/stacks/metallb/main.tf @@ -0,0 +1,4 @@ +module "metallb" { + source = "./modules/metallb" + tier = local.tiers.core +} diff --git a/stacks/metallb/modules/metallb/main.tf b/stacks/metallb/modules/metallb/main.tf new file mode 100644 index 00000000..1659f08e --- /dev/null +++ b/stacks/metallb/modules/metallb/main.tf @@ -0,0 +1,40 @@ +# Creates namespace and everythin needed +# Do not use until https://github.com/colinwilson/terraform-kubernetes-metallb/issues/5 is solved +# module "metallb" { +# source = "colinwilson/metallb/kubernetes" +# version = "0.1.7" +# } +variable "tier" { type = string } + +resource "kubernetes_namespace" "metallb" { + metadata { + name = "metallb-system" + labels = { + app = "metallb" + # "istio-injection" : "disabled" + # tier = var.tier + } + } +} + +module "metallb" { + source = "ViktorBarzin/metallb/kubernetes" + version = "0.1.5" + depends_on = [kubernetes_namespace.metallb] +} + +resource "kubernetes_config_map" "config" { + metadata { + name = "config" + namespace = kubernetes_namespace.metallb.metadata[0].name + } + data = { + config = < user if user.role == "admin" }) + + metadata { + name = "oidc-admin-${each.key}" + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "cluster-admin" + } + + subject { + kind = "User" + name = each.value.email + api_group = "rbac.authorization.k8s.io" + } +} + +# --- Power-user role --- +# Can manage workloads cluster-wide but cannot modify RBAC, nodes, or persistent volumes + +resource "kubernetes_cluster_role" "power_user" { + metadata { + name = "oidc-power-user" + } + + # Core resources + rule { + api_groups = [""] + resources = ["pods", "pods/log", "pods/exec", "services", "endpoints", "configmaps", "secrets", "persistentvolumeclaims", "events", "namespaces"] + verbs = ["get", "list", "watch"] + } + + rule { + api_groups = [""] + resources = ["pods", "services", "configmaps", "secrets", "persistentvolumeclaims"] + verbs = ["create", "update", "patch", "delete"] + } + + # Apps + rule { + api_groups = ["apps"] + resources = ["deployments", "statefulsets", "daemonsets", "replicasets"] + verbs = ["get", "list", "watch", "create", "update", "patch", "delete"] + } + + # Batch + rule { + api_groups = ["batch"] + resources = ["jobs", "cronjobs"] + verbs = ["get", "list", "watch", "create", "update", "patch", "delete"] + } + + # Networking + rule { + api_groups = ["networking.k8s.io"] + resources = ["ingresses", "networkpolicies"] + verbs = ["get", "list", "watch", "create", "update", "patch", "delete"] + } + + # Autoscaling + rule { + api_groups = ["autoscaling"] + resources = ["horizontalpodautoscalers"] + verbs = ["get", "list", "watch", "create", "update", "patch", "delete"] + } + + # Read-only on cluster-level resources + rule { + api_groups = [""] + resources = ["nodes"] + verbs = ["get", "list", "watch"] + } + + rule { + api_groups = ["storage.k8s.io"] + resources = ["storageclasses"] + verbs = ["get", "list", "watch"] + } + + rule { + api_groups = ["rbac.authorization.k8s.io"] + resources = ["clusterroles", "clusterrolebindings", "roles", "rolebindings"] + verbs = ["get", "list", "watch"] + } +} + +resource "kubernetes_cluster_role_binding" "power_users" { + for_each = nonsensitive({ for name, user in var.k8s_users : name => user if user.role == "power-user" }) + + metadata { + name = "oidc-power-user-${each.key}" + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = kubernetes_cluster_role.power_user.metadata[0].name + } + + subject { + kind = "User" + name = each.value.email + api_group = "rbac.authorization.k8s.io" + } +} + +# --- Namespace-owner role --- +# Full admin within assigned namespaces + read-only cluster-wide + +locals { + # Flatten user->namespace pairs for iteration + namespace_owner_pairs = flatten([ + for name, user in var.k8s_users : [ + for ns in user.namespaces : { + user_key = name + namespace = ns + email = user.email + quota = user.quota + } + ] if user.role == "namespace-owner" + ]) +} + +resource "kubernetes_role_binding" "namespace_owner" { + for_each = nonsensitive({ for pair in local.namespace_owner_pairs : "${pair.user_key}-${pair.namespace}" => pair }) + + metadata { + name = "namespace-owner-${each.value.user_key}" + namespace = each.value.namespace + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "admin" # Built-in ClusterRole with full namespace access + } + + subject { + kind = "User" + name = each.value.email + api_group = "rbac.authorization.k8s.io" + } +} + +# Read-only cluster-wide access for namespace owners +resource "kubernetes_cluster_role" "namespace_owner_readonly" { + metadata { + name = "oidc-namespace-owner-readonly" + } + + rule { + api_groups = [""] + resources = ["namespaces", "nodes"] + verbs = ["get", "list", "watch"] + } + + rule { + api_groups = [""] + resources = ["pods", "services", "configmaps", "events"] + verbs = ["get", "list", "watch"] + } + + rule { + api_groups = ["apps"] + resources = ["deployments", "statefulsets", "daemonsets"] + verbs = ["get", "list", "watch"] + } +} + +resource "kubernetes_cluster_role_binding" "namespace_owner_readonly" { + for_each = nonsensitive({ for name, user in var.k8s_users : name => user if user.role == "namespace-owner" }) + + metadata { + name = "oidc-ns-owner-readonly-${each.key}" + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = kubernetes_cluster_role.namespace_owner_readonly.metadata[0].name + } + + subject { + kind = "User" + name = each.value.email + api_group = "rbac.authorization.k8s.io" + } +} + +# Resource quotas per user namespace +resource "kubernetes_resource_quota" "user_namespace_quota" { + for_each = nonsensitive({ for pair in local.namespace_owner_pairs : "${pair.user_key}-${pair.namespace}" => pair }) + + metadata { + name = "user-quota" + namespace = each.value.namespace + } + + spec { + hard = { + "requests.cpu" = each.value.quota.cpu_requests + "requests.memory" = each.value.quota.memory_requests + "limits.memory" = each.value.quota.memory_limits + "pods" = each.value.quota.pods + } + } + + depends_on = [kubernetes_role_binding.namespace_owner] +} + +# ConfigMap with user-role mapping for the self-service portal +resource "kubernetes_config_map" "user_roles" { + metadata { + name = "k8s-user-roles" + namespace = "k8s-portal" + } + + data = { + "users.json" = jsonencode({ + for name, user in var.k8s_users : user.email => { + role = user.role + namespaces = user.namespaces + } + }) + } +} + +# TLS secret in each user namespace (so they can create HTTPS ingresses) +module "user_namespace_tls" { + for_each = nonsensitive(toset(flatten([ + for name, user in var.k8s_users : user.namespaces + if user.role == "namespace-owner" + ]))) + + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = each.value + tls_secret_name = var.tls_secret_name +} diff --git a/stacks/rbac/secrets b/stacks/rbac/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/rbac/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/rbac/terragrunt.hcl b/stacks/rbac/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/rbac/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/rbac/tiers.tf b/stacks/rbac/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/rbac/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/redis/main.tf b/stacks/redis/main.tf new file mode 100644 index 00000000..d6e7c8f9 --- /dev/null +++ b/stacks/redis/main.tf @@ -0,0 +1,9 @@ +variable "tls_secret_name" { type = string } +variable "nfs_server" { type = string } + +module "redis" { + source = "./modules/redis" + tls_secret_name = var.tls_secret_name + nfs_server = var.nfs_server + tier = local.tiers.cluster +} diff --git a/stacks/redis/modules/redis/main.tf b/stacks/redis/modules/redis/main.tf new file mode 100644 index 00000000..5f5c5966 --- /dev/null +++ b/stacks/redis/modules/redis/main.tf @@ -0,0 +1,310 @@ +variable "tls_secret_name" {} +variable "tier" { type = string } +variable "nfs_server" { type = string } + +resource "kubernetes_namespace" "redis" { + metadata { + name = "redis" + labels = { + tier = var.tier + } + } +} + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.redis.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +# Redis with Sentinel HA via Bitnami Helm chart +# Architecture: 1 master + 1 replica + 2 sentinels (one per node) +# Sentinel automatically promotes a replica if master fails +# HAProxy sits in front and routes only to the current master (see below) +resource "helm_release" "redis" { + namespace = kubernetes_namespace.redis.metadata[0].name + create_namespace = false + name = "redis" + atomic = true + timeout = 600 + + repository = "oci://10.0.20.10:5000/bitnamicharts" + chart = "redis" + version = "25.3.2" + + values = [yamlencode({ + architecture = "replication" + + auth = { + enabled = false + } + + sentinel = { + enabled = true + quorum = 2 + masterSet = "mymaster" + automateCluster = true + + resources = { + requests = { + cpu = "50m" + memory = "64Mi" + } + limits = { + memory = "64Mi" + } + } + } + + master = { + persistence = { + enabled = true + storageClass = "iscsi-truenas" + size = "2Gi" + } + + resources = { + requests = { + cpu = "100m" + memory = "64Mi" + } + limits = { + memory = "64Mi" + } + } + } + + replica = { + replicaCount = 2 + + persistence = { + enabled = true + storageClass = "iscsi-truenas" + size = "2Gi" + } + + resources = { + requests = { + cpu = "50m" + memory = "64Mi" + } + limits = { + memory = "64Mi" + } + } + } + + # Metrics for Prometheus + metrics = { + enabled = false + } + + # Use the existing service name so clients don't need changes + # Sentinel-enabled Bitnami chart creates a headless service + # and a regular service pointing at the master + nameOverride = "redis" + })] +} + +# HAProxy-based master-only proxy for simple redis:// clients. +# Health-checks each Redis node via INFO replication and only routes +# to the current master. On Sentinel failover, HAProxy detects the +# new master within seconds via its health check interval. +# Previously this was a K8s Service that routed to all nodes, causing +# READONLY errors when clients hit a replica. + +resource "kubernetes_config_map" "haproxy" { + metadata { + name = "redis-haproxy" + namespace = kubernetes_namespace.redis.metadata[0].name + } + data = { + "haproxy.cfg" = <<-EOT + global + maxconn 256 + + defaults + mode tcp + timeout connect 5s + timeout client 30s + timeout server 30s + timeout check 3s + + frontend redis_front + bind *:6379 + default_backend redis_master + + frontend sentinel_front + bind *:26379 + default_backend redis_sentinel + + backend redis_master + option tcp-check + tcp-check connect + tcp-check send "PING\r\n" + tcp-check expect string +PONG + tcp-check send "INFO replication\r\n" + tcp-check expect string role:master + tcp-check send "QUIT\r\n" + tcp-check expect string +OK + server redis-node-0 redis-node-0.redis-headless.redis.svc.cluster.local:6379 check inter 3s fall 3 rise 2 + server redis-node-1 redis-node-1.redis-headless.redis.svc.cluster.local:6379 check inter 3s fall 3 rise 2 + + backend redis_sentinel + balance roundrobin + server redis-node-0 redis-node-0.redis-headless.redis.svc.cluster.local:26379 check inter 5s + server redis-node-1 redis-node-1.redis-headless.redis.svc.cluster.local:26379 check inter 5s + EOT + } +} + +resource "kubernetes_deployment" "haproxy" { + metadata { + name = "redis-haproxy" + namespace = kubernetes_namespace.redis.metadata[0].name + labels = { + app = "redis-haproxy" + } + } + spec { + replicas = 2 + selector { + match_labels = { + app = "redis-haproxy" + } + } + template { + metadata { + labels = { + app = "redis-haproxy" + } + } + spec { + container { + name = "haproxy" + image = "docker.io/library/haproxy:3.1-alpine" + port { + container_port = 6379 + name = "redis" + } + port { + container_port = 26379 + name = "sentinel" + } + volume_mount { + name = "config" + mount_path = "/usr/local/etc/haproxy" + read_only = true + } + resources { + requests = { + cpu = "10m" + memory = "16Mi" + } + limits = { + memory = "16Mi" + } + } + liveness_probe { + tcp_socket { + port = 6379 + } + initial_delay_seconds = 5 + period_seconds = 10 + } + } + volume { + name = "config" + config_map { + name = kubernetes_config_map.haproxy.metadata[0].name + } + } + } + } + } + + depends_on = [helm_release.redis] +} + +resource "kubernetes_service" "redis" { + metadata { + name = "redis" + namespace = kubernetes_namespace.redis.metadata[0].name + } + spec { + selector = { + app = "redis-haproxy" + } + port { + name = "tcp-redis" + port = 6379 + target_port = 6379 + } + port { + name = "tcp-sentinel" + port = 26379 + target_port = 26379 + } + } + + depends_on = [kubernetes_deployment.haproxy] +} + +module "nfs_backup" { + source = "../../../../modules/kubernetes/nfs_volume" + name = "redis-backup" + namespace = kubernetes_namespace.redis.metadata[0].name + nfs_server = var.nfs_server + nfs_path = "/mnt/main/redis-backup" +} + +# Hourly backup: copy RDB snapshot from master to NFS +resource "kubernetes_cron_job_v1" "redis-backup" { + metadata { + name = "redis-backup" + namespace = kubernetes_namespace.redis.metadata[0].name + } + spec { + concurrency_policy = "Replace" + failed_jobs_history_limit = 3 + schedule = "0 * * * *" + starting_deadline_seconds = 10 + successful_jobs_history_limit = 3 + job_template { + metadata {} + spec { + backoff_limit = 2 + ttl_seconds_after_finished = 60 + template { + metadata {} + spec { + container { + name = "redis-backup" + image = "redis:7-alpine" + command = ["/bin/sh", "-c", <<-EOT + set -eux + # Trigger a fresh RDB save on the master + redis-cli -h redis.redis BGSAVE + sleep 5 + # Copy the RDB via redis-cli --rdb + redis-cli -h redis.redis --rdb /backup/dump.rdb + echo "Backup complete: $(ls -lh /backup/dump.rdb)" + EOT + ] + volume_mount { + name = "backup" + mount_path = "/backup" + } + } + volume { + name = "backup" + persistent_volume_claim { + claim_name = module.nfs_backup.claim_name + } + } + } + } + } + } + } +} diff --git a/stacks/redis/secrets b/stacks/redis/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/redis/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/redis/terragrunt.hcl b/stacks/redis/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/redis/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/redis/tiers.tf b/stacks/redis/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/redis/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/reverse-proxy/main.tf b/stacks/reverse-proxy/main.tf new file mode 100644 index 00000000..2416792a --- /dev/null +++ b/stacks/reverse-proxy/main.tf @@ -0,0 +1,18 @@ +variable "tls_secret_name" { type = string } + +data "vault_kv_secret_v2" "secrets" { + mount = "secret" + name = "platform" +} + +locals { + homepage_credentials = jsondecode(data.vault_kv_secret_v2.secrets.data["homepage_credentials"]) +} + +module "reverse-proxy" { + source = "./modules/reverse_proxy" + tls_secret_name = var.tls_secret_name + truenas_homepage_token = local.homepage_credentials["reverse_proxy"]["truenas_token"] + pfsense_homepage_token = local.homepage_credentials["reverse_proxy"]["pfsense_token"] + haos_homepage_token = try(local.homepage_credentials["home_assistant"]["token"], "") +} diff --git a/stacks/reverse-proxy/modules/reverse_proxy/factory/main.tf b/stacks/reverse-proxy/modules/reverse_proxy/factory/main.tf new file mode 100644 index 00000000..1af42844 --- /dev/null +++ b/stacks/reverse-proxy/modules/reverse_proxy/factory/main.tf @@ -0,0 +1,163 @@ +variable "name" {} +variable "namespace" { + default = "reverse-proxy" +} +variable "external_name" {} +variable "port" { + default = "80" +} +variable "tls_secret_name" {} +variable "backend_protocol" { + default = "HTTP" +} +variable "protected" { + type = bool + default = true +} +variable "ingress_path" { + type = list(string) + default = ["/"] +} +variable "max_body_size" { + type = string + default = "50m" +} +variable "extra_annotations" { + default = {} +} +variable "rybbit_site_id" { + default = null + type = string +} +variable "custom_content_security_policy" { + default = null + type = string +} +variable "strip_auth_headers" { + type = bool + default = false +} +variable "extra_middlewares" { + type = list(string) + default = [] +} + + +resource "kubernetes_service" "proxied-service" { + metadata { + name = var.name + namespace = var.namespace + labels = { + "app" = var.name + } + } + + spec { + type = "ExternalName" + external_name = var.external_name + + port { + name = var.backend_protocol == "HTTPS" ? "https-${var.name}" : "${var.name}-web" + port = var.port + protocol = "TCP" + target_port = var.port + } + } +} + +resource "kubernetes_ingress_v1" "proxied-ingress" { + metadata { + name = var.name + namespace = var.namespace + annotations = merge({ + "traefik.ingress.kubernetes.io/router.middlewares" = join(",", compact(concat([ + "traefik-rate-limit@kubernetescrd", + var.custom_content_security_policy == null ? "traefik-csp-headers@kubernetescrd" : null, + "traefik-crowdsec@kubernetescrd", + var.protected ? "traefik-authentik-forward-auth@kubernetescrd" : null, + var.strip_auth_headers ? "traefik-strip-auth-headers@kubernetescrd" : null, + var.rybbit_site_id != null ? "traefik-strip-accept-encoding@kubernetescrd" : null, + var.rybbit_site_id != null ? "${var.namespace}-rybbit-analytics-${var.name}@kubernetescrd" : null, + var.custom_content_security_policy != null ? "${var.namespace}-custom-csp-${var.name}@kubernetescrd" : null, + ], var.extra_middlewares))) + "traefik.ingress.kubernetes.io/router.entrypoints" = "websecure" + "traefik.ingress.kubernetes.io/service.serversscheme" = var.backend_protocol == "HTTPS" ? "https" : null + "traefik.ingress.kubernetes.io/service.serverstransport" = var.backend_protocol == "HTTPS" ? "traefik-insecure-skip-verify@kubernetescrd" : null + }, var.extra_annotations) + } + + spec { + ingress_class_name = "traefik" + tls { + hosts = ["${var.name}.viktorbarzin.me"] + secret_name = var.tls_secret_name + } + rule { + host = "${var.name}.viktorbarzin.me" + http { + dynamic "path" { + for_each = var.ingress_path + + content { + path = path.value + backend { + service { + + name = var.name + port { + number = var.port + } + } + } + } + } + } + } + } +} + +# Rybbit analytics middleware (rewrite-body plugin with content-type filtering) - created per service when rybbit_site_id is set +resource "kubernetes_manifest" "rybbit_analytics" { + count = var.rybbit_site_id != null ? 1 : 0 + + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "rybbit-analytics-${var.name}" + namespace = var.namespace + } + spec = { + plugin = { + rewrite-body = { + rewrites = [{ + regex = "" + replacement = "" + }] + monitoring = { + types = ["text/html"] + } + } + } + } + } +} + +# Custom CSP headers middleware - created per service when custom_content_security_policy is set +resource "kubernetes_manifest" "custom_csp" { + count = var.custom_content_security_policy != null ? 1 : 0 + + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "custom-csp-${var.name}" + namespace = var.namespace + } + spec = { + headers = { + contentSecurityPolicy = var.custom_content_security_policy + } + } + } +} diff --git a/stacks/reverse-proxy/modules/reverse_proxy/main.tf b/stacks/reverse-proxy/modules/reverse_proxy/main.tf new file mode 100644 index 00000000..e6dcc34b --- /dev/null +++ b/stacks/reverse-proxy/modules/reverse_proxy/main.tf @@ -0,0 +1,356 @@ +# Reverse proxy for things in my infra that are +# outside of K8S but would be nice to use the Nginx-ingress + +variable "tls_secret_name" {} +variable "truenas_homepage_token" {} +variable "pfsense_homepage_token" {} +variable "haos_homepage_token" { + type = string + default = "" + sensitive = true +} + +resource "kubernetes_namespace" "reverse-proxy" { + metadata { + name = "reverse-proxy" + } +} + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = "reverse-proxy" + tls_secret_name = var.tls_secret_name + depends_on = [kubernetes_namespace.reverse-proxy] +} + +# https://pfsense.viktorbarzin.me/ +module "pfsense" { + source = "./factory" + name = "pfsense" + external_name = "pfsense.viktorbarzin.lan" + tls_secret_name = var.tls_secret_name + port = 443 + backend_protocol = "HTTPS" + + extra_annotations = { + "gethomepage.dev/enabled" : "true" + "gethomepage.dev/description" : "Cluster Firewall" + "gethomepage.dev/group" : "Identity & Security" + "gethomepage.dev/icon" : "pfsense.png" + "gethomepage.dev/name" : "pFsense" + "gethomepage.dev/widget.type" : "pfsense" + "gethomepage.dev/widget.version" : "2" + "gethomepage.dev/widget.url" : "https://10.0.20.1" + "gethomepage.dev/widget.username" : "admin" + "gethomepage.dev/widget.password" : var.pfsense_homepage_token + "gethomepage.dev/widget.fields" = "[\"load\", \"memory\", \"temp\", \"disk\"]" + "gethomepage.dev/widget.wan" = "vtnet0" + } + depends_on = [kubernetes_namespace.reverse-proxy] + rybbit_site_id = "b029580e5a7c" +} + +# https://nas.viktorbarzin.me/ +module "nas" { + source = "./factory" + name = "nas" + external_name = "nas.viktorbarzin.lan" + port = 5001 + tls_secret_name = var.tls_secret_name + backend_protocol = "HTTPS" + max_body_size = "0m" + depends_on = [kubernetes_namespace.reverse-proxy] + rybbit_site_id = "1e11f8449f7d" + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Synology NAS" + "gethomepage.dev/description" = "Network storage" + "gethomepage.dev/icon" = "synology.png" + "gethomepage.dev/group" = "Infrastructure" + "gethomepage.dev/pod-selector" = "" + } +} + +# https://files.viktorbarzin.me/ +module "nas-files" { + source = "./factory" + name = "files" + external_name = "nas.viktorbarzin.lan" + port = 5001 + tls_secret_name = var.tls_secret_name + backend_protocol = "HTTPS" + protected = false # allow anyone to download files + ingress_path = ["/sharing", "/scripts", "/webman", "/wfmlogindialog.js", "/fsdownload"] + max_body_size = "0m" + depends_on = [kubernetes_namespace.reverse-proxy] + extra_annotations = { "gethomepage.dev/enabled" = "false" } +} + +# https://idrac.viktorbarzin.me/ +module "idrac" { + source = "./factory" + name = "idrac" + external_name = "idrac.viktorbarzin.lan" + port = 443 + tls_secret_name = var.tls_secret_name + backend_protocol = "HTTPS" + strip_auth_headers = true + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "iDRAC" + "gethomepage.dev/description" = "Server management" + "gethomepage.dev/icon" = "dell.png" + "gethomepage.dev/group" = "Infrastructure" + "gethomepage.dev/pod-selector" = "" + } + depends_on = [kubernetes_namespace.reverse-proxy] +} + +# Can either listen on https or http; can't do both :/ +# TODO: Not working yet +module "tp-link-gateway" { + source = "./factory" + name = "gw" + external_name = "gw.viktorbarzin.lan" + port = 443 + tls_secret_name = var.tls_secret_name + backend_protocol = "HTTPS" + depends_on = [kubernetes_namespace.reverse-proxy] + protected = true + strip_auth_headers = true + extra_annotations = { "gethomepage.dev/enabled" = "false" } +} + +# https://truenas.viktorbarzin.me/ +module "truenas" { + source = "./factory" + name = "truenas" + external_name = "truenas.viktorbarzin.lan" + port = 80 + tls_secret_name = var.tls_secret_name + max_body_size = "0m" + + extra_annotations = { + "gethomepage.dev/enabled" : "true" + "gethomepage.dev/description" : "TrueNAS" + "gethomepage.dev/group" : "Infrastructure" + "gethomepage.dev/icon" : "truenas.png" + "gethomepage.dev/name" : "TrueNAS" + "gethomepage.dev/widget.type" : "truenas" + "gethomepage.dev/widget.url" : "https://truenas.viktorbarzin.lan" + "gethomepage.dev/widget.key" : var.truenas_homepage_token + # "gethomepage.dev/widget.enablePools" : "true" + # "gethomepage.dev/pod-selector" : "" + } + depends_on = [kubernetes_namespace.reverse-proxy] + rybbit_site_id = "b66fbd3cb58a" +} + +# https://r730.viktorbarzin.me/ +module "r730" { + source = "./factory" + name = "r730" + external_name = "r730.viktorbarzin.lan" + port = 443 + tls_secret_name = var.tls_secret_name + backend_protocol = "HTTPS" + depends_on = [kubernetes_namespace.reverse-proxy] + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "R730" + "gethomepage.dev/description" = "Dell PowerEdge server" + "gethomepage.dev/icon" = "dell.png" + "gethomepage.dev/group" = "Infrastructure" + "gethomepage.dev/pod-selector" = "" + } +} + +# https://proxmox.viktorbarzin.me/ +module "proxmox" { + source = "./factory" + name = "proxmox" + external_name = "proxmox.viktorbarzin.lan" + port = 8006 + tls_secret_name = var.tls_secret_name + backend_protocol = "HTTPS" + max_body_size = "0" # unlimited + depends_on = [kubernetes_namespace.reverse-proxy] + rybbit_site_id = "190a7ad3e1c7" + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Proxmox" + "gethomepage.dev/description" = "Hypervisor" + "gethomepage.dev/icon" = "proxmox.png" + "gethomepage.dev/group" = "Infrastructure" + "gethomepage.dev/pod-selector" = "" + } +} + +# https://registry.viktorbarzin.me/ +module "docker-registry-ui" { + source = "./factory" + name = "registry" + external_name = "docker-registry.viktorbarzin.lan" + port = 8080 + tls_secret_name = var.tls_secret_name + depends_on = [kubernetes_namespace.reverse-proxy] + extra_annotations = { + # Override middleware chain to remove rate-limit; the UI fires many API calls to list repos/tags + "traefik.ingress.kubernetes.io/router.middlewares" = "traefik-csp-headers@kubernetescrd,traefik-crowdsec@kubernetescrd,traefik-authentik-forward-auth@kubernetescrd" + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Docker Registry" + "gethomepage.dev/description" = "Container registry" + "gethomepage.dev/icon" = "docker.png" + "gethomepage.dev/group" = "Infrastructure" + "gethomepage.dev/pod-selector" = "" + } +} + +# https://valchedrym.viktorbarzin.me/ +module "valchedrym" { + source = "./factory" + name = "valchedrym" + external_name = "valchedrym.viktorbarzin.lan" + tls_secret_name = var.tls_secret_name + port = 80 + backend_protocol = "HTTP" + depends_on = [kubernetes_namespace.reverse-proxy] + extra_annotations = { "gethomepage.dev/enabled" = "false" } +} + +# https://ip150.viktorbarzin.me/ +# Server has funky behaviour based on headers; works on some browrsers not others... +# module "valchedrym-ip150" { +# source = "./factory" +# name = "ip150" +# # external_name = "valchedrym.ddns.net" +# external_name = "192.168.0.10" +# port = 80 +# backend_protocol = "HTTP" +# use_proxy_protocol = false +# tls_secret_name = var.tls_secret_name +# protected = false +# depends_on = [kubernetes_namespace.reverse-proxy] +# } + +# https://mladost3.viktorbarzin.me/ +module "mladost3" { + source = "./factory" + name = "mladost3" + external_name = "mladost3.ddns.net" + port = 8080 + tls_secret_name = var.tls_secret_name + depends_on = [kubernetes_namespace.reverse-proxy] + extra_annotations = { "gethomepage.dev/enabled" = "false" } +} + +# # https://server-switch.viktorbarzin.me/ +# module "server-switch" { +# source = "./factory" +# name = "server-switch" +# external_name = "server-switch.viktorbarzin.lan" +# port = 80 +# tls_secret_name = var.tls_secret_name +# depends_on = [kubernetes_namespace.reverse-proxy] +# } + +# https://ha-sofia.viktorbarzin.me/ +module "ha-sofia" { + source = "./factory" + name = "ha-sofia" + external_name = "ha-sofia.viktorbarzin.lan" + port = 8123 + tls_secret_name = var.tls_secret_name + depends_on = [kubernetes_namespace.reverse-proxy] + protected = false + rybbit_site_id = "590fc392690a" + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Home Assistant Sofia" + "gethomepage.dev/description" = "Smart home hub" + "gethomepage.dev/icon" = "home-assistant.png" + "gethomepage.dev/group" = "Smart Home" + "gethomepage.dev/pod-selector" = "" + } +} + +# https://ha-london.viktorbarzin.me/ +module "ha-london" { + source = "./factory" + name = "ha-london" + external_name = "ha-london.viktorbarzin.lan" + port = 8123 + tls_secret_name = var.tls_secret_name + depends_on = [kubernetes_namespace.reverse-proxy] + protected = false + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Home Assistant London" + "gethomepage.dev/description" = "Smart home hub" + "gethomepage.dev/icon" = "home-assistant.png" + "gethomepage.dev/group" = "Smart Home" + "gethomepage.dev/pod-selector" = "" + } +} + +# https://london.viktorbarzin.me/ +module "london" { + source = "./factory" + name = "london" + external_name = "openwrt-london.viktorbarzin.lan" + port = 443 + tls_secret_name = var.tls_secret_name + backend_protocol = "HTTPS" + protected = true + depends_on = [kubernetes_namespace.reverse-proxy] + extra_annotations = { + "gethomepage.dev/enabled" : "false" + "gethomepage.dev/description" : "OpenWRT London" + # gethomepage.dev/group: Media + "gethomepage.dev/icon" : "openwrt.png" + "gethomepage.dev/name" : "OpenWRT London" + "gethomepage.dev/widget.type" : "openwrt" + "gethomepage.dev/widget.url" : "https://100.64.0.14" + # "gethomepage.dev/widget.token" = var.homepage_token + "gethomepage.dev/widget.username" : "homepage" + "gethomepage.dev/widget.password" : "" # add later as Flint2's openwrt is a little odd + "gethomepage.dev/pod-selector" : "" + } +} +module "pi-lights" { + source = "./factory" + name = "pi" + external_name = "ha-london.viktorbarzin.lan" + port = 5000 + tls_secret_name = var.tls_secret_name + protected = true + depends_on = [kubernetes_namespace.reverse-proxy] + extra_annotations = { "gethomepage.dev/enabled" = "false" } +} + +# module "ups" { # .NET app doesn't work well behind host +# source = "./factory" +# name = "ups" +# external_name = "ups.viktorbarzin.lan" +# backend_protocol = "HTTPS" +# port = 443 +# tls_secret_name = var.tls_secret_name +# # protected = true +# protected = false +# depends_on = [kubernetes_namespace.reverse-proxy] +# extra_annotations = { +# "nginx.ingress.kubernetes.io/upstream-vhost" : "", +# # "nginx.ingress.kubernetes.io/proxy-set-header" : "Host: <>", +# } +# } + +module "mbp14" { + source = "./factory" + name = "mbp14" + external_name = "mbp14.viktorbarzin.lan" + port = 4020 + tls_secret_name = var.tls_secret_name + protected = true + depends_on = [kubernetes_namespace.reverse-proxy] + extra_annotations = { "gethomepage.dev/enabled" = "false" } +} diff --git a/stacks/reverse-proxy/secrets b/stacks/reverse-proxy/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/reverse-proxy/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/reverse-proxy/terragrunt.hcl b/stacks/reverse-proxy/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/reverse-proxy/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/reverse-proxy/tiers.tf b/stacks/reverse-proxy/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/reverse-proxy/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/sealed-secrets/main.tf b/stacks/sealed-secrets/main.tf new file mode 100644 index 00000000..fad73b0e --- /dev/null +++ b/stacks/sealed-secrets/main.tf @@ -0,0 +1,4 @@ +module "sealed-secrets" { + source = "./modules/sealed-secrets" + tier = local.tiers.cluster +} diff --git a/stacks/sealed-secrets/modules/sealed-secrets/main.tf b/stacks/sealed-secrets/modules/sealed-secrets/main.tf new file mode 100644 index 00000000..2175e5d8 --- /dev/null +++ b/stacks/sealed-secrets/modules/sealed-secrets/main.tf @@ -0,0 +1,45 @@ +variable "tier" { type = string } + +# ----------------------------------------------------------------------------- +# Namespace +# ----------------------------------------------------------------------------- +resource "kubernetes_namespace" "sealed_secrets" { + metadata { + name = "sealed-secrets" + labels = { + tier = var.tier + } + } +} + +# ----------------------------------------------------------------------------- +# Sealed Secrets — encrypts secrets for safe git storage +# https://github.com/bitnami-labs/sealed-secrets +# ----------------------------------------------------------------------------- +resource "helm_release" "sealed_secrets" { + namespace = kubernetes_namespace.sealed_secrets.metadata[0].name + create_namespace = false + name = "sealed-secrets" + atomic = true + timeout = 300 + + repository = "https://bitnami-labs.github.io/sealed-secrets" + chart = "sealed-secrets" + version = "2.18.3" + + values = [yamlencode({ + crds = { + create = true + } + + resources = { + requests = { + cpu = "50m" + memory = "192Mi" + } + limits = { + memory = "192Mi" + } + } + })] +} diff --git a/stacks/sealed-secrets/secrets b/stacks/sealed-secrets/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/sealed-secrets/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/sealed-secrets/terragrunt.hcl b/stacks/sealed-secrets/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/sealed-secrets/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/sealed-secrets/tiers.tf b/stacks/sealed-secrets/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/sealed-secrets/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/technitium/main.tf b/stacks/technitium/main.tf new file mode 100644 index 00000000..751d1159 --- /dev/null +++ b/stacks/technitium/main.tf @@ -0,0 +1,24 @@ +variable "tls_secret_name" { type = string } +variable "nfs_server" { type = string } +variable "mysql_host" { type = string } + +data "vault_kv_secret_v2" "secrets" { + mount = "secret" + name = "platform" +} + +locals { + homepage_credentials = jsondecode(data.vault_kv_secret_v2.secrets.data["homepage_credentials"]) +} + +module "technitium" { + source = "./modules/technitium" + tls_secret_name = var.tls_secret_name + nfs_server = var.nfs_server + mysql_host = var.mysql_host + homepage_token = local.homepage_credentials["technitium"]["token"] + technitium_db_password = data.vault_kv_secret_v2.secrets.data["technitium_db_password"] + technitium_username = data.vault_kv_secret_v2.secrets.data["technitium_username"] + technitium_password = data.vault_kv_secret_v2.secrets.data["technitium_password"] + tier = local.tiers.core +} diff --git a/stacks/technitium/modules/technitium/dashboards/technitium-dns.json b/stacks/technitium/modules/technitium/dashboards/technitium-dns.json new file mode 100644 index 00000000..b0b17c37 --- /dev/null +++ b/stacks/technitium/modules/technitium/dashboards/technitium-dns.json @@ -0,0 +1,488 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "datasource", "uid": "grafana" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Technitium DNS query logs from MySQL", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "title": "Total Queries", + "type": "stat", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "steps": [ + { "color": "green", "value": null } + ] + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) as total_queries FROM dns_logs WHERE $__timeFilter(timestamp)", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Cached %", + "type": "stat", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "unit": "percentunit", + "thresholds": { + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 0.3 }, + { "color": "green", "value": 0.5 } + ] + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + }, + "targets": [ + { + "rawSql": "SELECT SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) / COUNT(*) as cached_pct FROM dns_logs WHERE $__timeFilter(timestamp)", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Blocked %", + "type": "stat", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "unit": "percentunit", + "thresholds": { + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.1 }, + { "color": "red", "value": 0.3 } + ] + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + }, + "targets": [ + { + "rawSql": "SELECT SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) / COUNT(*) as blocked_pct FROM dns_logs WHERE $__timeFilter(timestamp)", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "NxDomain %", + "type": "stat", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "unit": "percentunit", + "thresholds": { + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.2 }, + { "color": "red", "value": 0.5 } + ] + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + }, + "targets": [ + { + "rawSql": "SELECT SUM(CASE WHEN rcode = 3 THEN 1 ELSE 0 END) / COUNT(*) as nxdomain_pct FROM dns_logs WHERE $__timeFilter(timestamp)", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Avg Response Time", + "type": "stat", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "unit": "ms", + "thresholds": { + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "red", "value": 200 } + ] + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + }, + "targets": [ + { + "rawSql": "SELECT AVG(response_rtt) as avg_rtt_ms FROM dns_logs WHERE $__timeFilter(timestamp) AND response_rtt IS NOT NULL", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Queries by Protocol", + "type": "stat", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" } + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "textMode": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true } + }, + "targets": [ + { + "rawSql": "SELECT SUM(CASE WHEN protocol = 0 THEN 1 ELSE 0 END) as UDP, SUM(CASE WHEN protocol = 1 THEN 1 ELSE 0 END) as TCP, SUM(CASE WHEN protocol = 3 THEN 1 ELSE 0 END) as DoH, SUM(CASE WHEN protocol = 4 THEN 1 ELSE 0 END) as DoT FROM dns_logs WHERE $__timeFilter(timestamp)", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Queries Over Time", + "type": "timeseries", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 4 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 50, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" } + } + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "rawSql": "SELECT $__timeGroup(timestamp, $__interval) as time, SUM(CASE WHEN response_type = 1 THEN 1 ELSE 0 END) as Authoritative, SUM(CASE WHEN response_type = 2 THEN 1 ELSE 0 END) as Recursive, SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) as Cached, SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) as Blocked, SUM(CASE WHEN response_type = 5 THEN 1 ELSE 0 END) as Dropped FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY time ORDER BY time", + "format": "time_series", + "refId": "A" + } + ] + }, + { + "title": "Response Codes", + "type": "piechart", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" } + }, + "overrides": [ + { "matcher": { "id": "byName", "options": "NOERROR" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }, + { "matcher": { "id": "byName", "options": "NXDOMAIN" }, "properties": [{ "id": "color", "value": { "fixedColor": "yellow", "mode": "fixed" } }] }, + { "matcher": { "id": "byName", "options": "SERVFAIL" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }, + { "matcher": { "id": "byName", "options": "REFUSED" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] } + ] + }, + "options": { + "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] }, + "pieType": "donut", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true }, + "tooltip": { "mode": "single" } + }, + "targets": [ + { + "rawSql": "SELECT SUM(CASE WHEN rcode = 0 THEN 1 ELSE 0 END) as NOERROR, SUM(CASE WHEN rcode = 2 THEN 1 ELSE 0 END) as SERVFAIL, SUM(CASE WHEN rcode = 3 THEN 1 ELSE 0 END) as NXDOMAIN, SUM(CASE WHEN rcode = 5 THEN 1 ELSE 0 END) as REFUSED, SUM(CASE WHEN rcode NOT IN (0,2,3,5) THEN 1 ELSE 0 END) as Other FROM dns_logs WHERE $__timeFilter(timestamp)", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Response Types", + "type": "piechart", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" } + }, + "overrides": [ + { "matcher": { "id": "byName", "options": "Cached" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }, + { "matcher": { "id": "byName", "options": "Blocked" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }, + { "matcher": { "id": "byName", "options": "Recursive" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }] }, + { "matcher": { "id": "byName", "options": "Authoritative" }, "properties": [{ "id": "color", "value": { "fixedColor": "purple", "mode": "fixed" } }] } + ] + }, + "options": { + "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] }, + "pieType": "donut", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true }, + "tooltip": { "mode": "single" } + }, + "targets": [ + { + "rawSql": "SELECT SUM(CASE WHEN response_type = 1 THEN 1 ELSE 0 END) as Authoritative, SUM(CASE WHEN response_type = 2 THEN 1 ELSE 0 END) as Recursive, SUM(CASE WHEN response_type = 3 THEN 1 ELSE 0 END) as Cached, SUM(CASE WHEN response_type = 4 THEN 1 ELSE 0 END) as Blocked, SUM(CASE WHEN response_type = 5 THEN 1 ELSE 0 END) as Dropped FROM dns_logs WHERE $__timeFilter(timestamp)", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Query Types", + "type": "piechart", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" } + }, + "overrides": [] + }, + "options": { + "legend": { "displayMode": "table", "placement": "right", "values": ["value", "percent"] }, + "pieType": "donut", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": true }, + "tooltip": { "mode": "single" } + }, + "targets": [ + { + "rawSql": "SELECT SUM(CASE WHEN qtype = 1 THEN 1 ELSE 0 END) as A, SUM(CASE WHEN qtype = 28 THEN 1 ELSE 0 END) as AAAA, SUM(CASE WHEN qtype = 5 THEN 1 ELSE 0 END) as CNAME, SUM(CASE WHEN qtype = 15 THEN 1 ELSE 0 END) as MX, SUM(CASE WHEN qtype = 16 THEN 1 ELSE 0 END) as TXT, SUM(CASE WHEN qtype = 33 THEN 1 ELSE 0 END) as SRV, SUM(CASE WHEN qtype = 12 THEN 1 ELSE 0 END) as PTR, SUM(CASE WHEN qtype = 6 THEN 1 ELSE 0 END) as SOA, SUM(CASE WHEN qtype = 2 THEN 1 ELSE 0 END) as NS, SUM(CASE WHEN qtype = 65 THEN 1 ELSE 0 END) as HTTPS, SUM(CASE WHEN qtype NOT IN (1,2,5,6,12,15,16,28,33,65) THEN 1 ELSE 0 END) as Other FROM dns_logs WHERE $__timeFilter(timestamp)", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Top 20 Queried Domains", + "type": "table", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 }, + "fieldConfig": { + "defaults": { + "custom": { "filterable": true } + }, + "overrides": [ + { "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] } + ] + }, + "options": { + "showHeader": true, + "sortBy": [{ "desc": true, "displayName": "count" }] + }, + "targets": [ + { + "rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY qname ORDER BY count DESC LIMIT 20", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Top 20 Clients", + "type": "table", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 }, + "fieldConfig": { + "defaults": { + "custom": { "filterable": true } + }, + "overrides": [ + { "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] } + ] + }, + "options": { + "showHeader": true, + "sortBy": [{ "desc": true, "displayName": "count" }] + }, + "targets": [ + { + "rawSql": "SELECT client_ip, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) GROUP BY client_ip ORDER BY count DESC LIMIT 20", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Average Response Time Over Time", + "type": "timeseries", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 30 }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "unit": "ms", + "custom": { + "axisBorderShow": false, + "axisLabel": "Response Time (ms)", + "axisPlacement": "auto", + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "lineWidth": 2, + "pointSize": 5, + "showPoints": "never", + "spanNulls": true + } + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { + "rawSql": "SELECT $__timeGroup(timestamp, $__interval) as time, AVG(response_rtt) as avg_rtt, MAX(response_rtt) as max_rtt FROM dns_logs WHERE $__timeFilter(timestamp) AND response_rtt IS NOT NULL GROUP BY time ORDER BY time", + "format": "time_series", + "refId": "A" + } + ] + }, + { + "title": "Top 20 NxDomain Domains", + "type": "table", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 10, "w": 12, "x": 0, "y": 38 }, + "fieldConfig": { + "defaults": { + "custom": { "filterable": true } + }, + "overrides": [ + { "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] } + ] + }, + "options": { + "showHeader": true, + "sortBy": [{ "desc": true, "displayName": "count" }] + }, + "targets": [ + { + "rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) AND rcode = 3 GROUP BY qname ORDER BY count DESC LIMIT 20", + "format": "table", + "refId": "A" + } + ] + }, + { + "title": "Top 20 Blocked Domains", + "type": "table", + "datasource": { "type": "mysql", "uid": "technitium-mysql" }, + "gridPos": { "h": 10, "w": 12, "x": 12, "y": 38 }, + "fieldConfig": { + "defaults": { + "custom": { "filterable": true } + }, + "overrides": [ + { "matcher": { "id": "byName", "options": "count" }, "properties": [{ "id": "custom.width", "value": 100 }] } + ] + }, + "options": { + "showHeader": true, + "sortBy": [{ "desc": true, "displayName": "count" }] + }, + "targets": [ + { + "rawSql": "SELECT qname as domain, COUNT(*) as count FROM dns_logs WHERE $__timeFilter(timestamp) AND response_type = 4 GROUP BY qname ORDER BY count DESC LIMIT 20", + "format": "table", + "refId": "A" + } + ] + } + ], + "refresh": "5m", + "schemaVersion": 39, + "tags": ["dns", "technitium", "mysql"], + "templating": { "list": [] }, + "time": { "from": "now-24h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Technitium DNS", + "uid": "technitium-dns", + "version": 1 +} diff --git a/stacks/technitium/modules/technitium/ha.tf b/stacks/technitium/modules/technitium/ha.tf new file mode 100644 index 00000000..8ad16b95 --- /dev/null +++ b/stacks/technitium/modules/technitium/ha.tf @@ -0,0 +1,278 @@ +# ============================================================================= +# Technitium DNS — High Availability (Primary-Secondary) +# ============================================================================= +# +# Secondary DNS instance replicates zones from primary via AXFR. +# Both pods share the `dns-server=true` label so the DNS LoadBalancer +# in main.tf routes queries to whichever pod is healthy. + +module "nfs_secondary_config" { + source = "../../../../modules/kubernetes/nfs_volume" + name = "technitium-secondary-config" + namespace = kubernetes_namespace.technitium.metadata[0].name + nfs_server = var.nfs_server + nfs_path = "/mnt/main/technitium-secondary" +} + +# Primary-only service for zone transfers (AXFR) and API access +resource "kubernetes_service" "technitium_primary" { + metadata { + name = "technitium-primary" + namespace = kubernetes_namespace.technitium.metadata[0].name + labels = { + "app" = "technitium" + } + } + + spec { + selector = { + app = "technitium" + } + port { + name = "dns-tcp" + port = 53 + protocol = "TCP" + } + port { + name = "dns-udp" + port = 53 + protocol = "UDP" + } + port { + name = "api" + port = 5380 + protocol = "TCP" + } + } +} + +# Secondary DNS deployment — zone-transfer replica +resource "kubernetes_deployment" "technitium_secondary" { + metadata { + name = "technitium-secondary" + namespace = kubernetes_namespace.technitium.metadata[0].name + labels = { + app = "technitium-secondary" + tier = var.tier + } + } + spec { + replicas = 1 + strategy { + type = "RollingUpdate" + rolling_update { + max_unavailable = "0" + max_surge = "1" + } + } + selector { + match_labels = { + app = "technitium-secondary" + } + } + template { + metadata { + labels = { + app = "technitium-secondary" + "dns-server" = "true" + } + } + spec { + affinity { + pod_anti_affinity { + required_during_scheduling_ignored_during_execution { + label_selector { + match_expressions { + key = "dns-server" + operator = "In" + values = ["true"] + } + } + topology_key = "kubernetes.io/hostname" + } + } + } + container { + image = "technitium/dns-server:latest" + name = "technitium" + env { + name = "DNS_SERVER_ADMIN_PASSWORD" + value = var.technitium_password + } + env { + name = "DNS_SERVER_ENABLE_BLOCKING" + value = "true" + } + resources { + requests = { + cpu = "25m" + memory = "512Mi" + } + limits = { + memory = "512Mi" + } + } + port { + container_port = 5380 + } + port { + container_port = 53 + } + port { + container_port = 80 + } + liveness_probe { + tcp_socket { + port = 53 + } + initial_delay_seconds = 10 + period_seconds = 10 + } + readiness_probe { + tcp_socket { + port = 53 + } + initial_delay_seconds = 5 + period_seconds = 5 + } + volume_mount { + mount_path = "/etc/dns" + name = "nfs-config" + } + } + volume { + name = "nfs-config" + persistent_volume_claim { + claim_name = module.nfs_secondary_config.claim_name + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } +} + +# Secondary web service — internal only, used by setup Job +resource "kubernetes_service" "technitium_secondary_web" { + metadata { + name = "technitium-secondary-web" + namespace = kubernetes_namespace.technitium.metadata[0].name + labels = { + "app" = "technitium-secondary" + } + } + + spec { + selector = { + app = "technitium-secondary" + } + port { + name = "api" + port = 5380 + protocol = "TCP" + } + } +} + +# PodDisruptionBudget — keep at least 1 DNS pod running during voluntary disruptions +resource "kubernetes_pod_disruption_budget_v1" "technitium_dns" { + metadata { + name = "technitium-dns" + namespace = kubernetes_namespace.technitium.metadata[0].name + } + spec { + min_available = "1" + selector { + match_labels = { + "dns-server" = "true" + } + } + } +} + +# Setup Job — configures secondary zones via Technitium REST API +resource "kubernetes_job" "technitium_secondary_setup" { + metadata { + name = "technitium-secondary-setup" + namespace = kubernetes_namespace.technitium.metadata[0].name + } + spec { + backoff_limit = 5 + template { + metadata {} + spec { + restart_policy = "OnFailure" + container { + name = "setup" + image = "curlimages/curl:latest" + command = ["/bin/sh", "-c", <<-SCRIPT + set -e + PRIMARY="http://technitium-primary.technitium.svc.cluster.local:5380" + SECONDARY="http://technitium-secondary-web.technitium.svc.cluster.local:5380" + + # Wait for both to be ready + until curl -sf "$PRIMARY/api/user/login?user=$TECH_USER&pass=$TECH_PASS" -o /tmp/p.json; do echo "Waiting for primary..."; sleep 5; done + until curl -sf "$SECONDARY/api/user/login?user=$TECH_USER&pass=$TECH_PASS" -o /tmp/s.json; do echo "Waiting for secondary..."; sleep 5; done + P_TOKEN=$(cat /tmp/p.json | sed -n 's/.*"token":"\([^"]*\)".*/\1/p') + S_TOKEN=$(cat /tmp/s.json | sed -n 's/.*"token":"\([^"]*\)".*/\1/p') + + # Get zones from primary (split JSON into lines so sed can match each zone) + curl -sf "$PRIMARY/api/zones/list?token=$P_TOKEN" | tr ',' '\n' | sed -n 's/.*"name":"\([^"]*\)".*/\1/p' > /tmp/zones.txt + echo "Found zones:"; cat /tmp/zones.txt + + # Enable zone transfers on primary for each zone + while read -r zone; do + echo "Enabling zone transfer for: $zone" + curl -sf "$PRIMARY/api/zones/options/set?token=$P_TOKEN&zone=$zone&zoneTransfer=Allow" || true + done < /tmp/zones.txt + + # Create secondary zones on secondary instance (ignore "already exists" errors) + while read -r zone; do + echo "Creating secondary zone: $zone" + curl -sf "$SECONDARY/api/zones/create?token=$S_TOKEN&zone=$zone&type=Secondary&primaryNameServerAddresses=$PRIMARY_IP" || true + done < /tmp/zones.txt + + # Force resync all secondary zones to pull latest data + while read -r zone; do + echo "Resyncing: $zone" + curl -sf "$SECONDARY/api/zones/resync?token=$S_TOKEN&zone=$zone" || true + done < /tmp/zones.txt + + echo "Secondary zone setup complete" + SCRIPT + ] + env { + name = "TECH_USER" + value = var.technitium_username + } + env { + name = "TECH_PASS" + value = var.technitium_password + } + env { + name = "PRIMARY_IP" + value = kubernetes_service.technitium_primary.spec[0].cluster_ip + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } + + depends_on = [ + kubernetes_deployment.technitium, + kubernetes_deployment.technitium_secondary, + kubernetes_service.technitium_primary, + kubernetes_service.technitium_secondary_web, + ] +} diff --git a/stacks/technitium/modules/technitium/main.tf b/stacks/technitium/modules/technitium/main.tf new file mode 100644 index 00000000..34d90a81 --- /dev/null +++ b/stacks/technitium/modules/technitium/main.tf @@ -0,0 +1,356 @@ +variable "tls_secret_name" {} +variable "tier" { type = string } +variable "homepage_token" {} +variable "technitium_db_password" {} +variable "nfs_server" { type = string } +variable "mysql_host" { type = string } +variable "technitium_username" { type = string } +variable "technitium_password" { + type = string + sensitive = true +} + +resource "kubernetes_namespace" "technitium" { + metadata { + name = "technitium" + labels = { + tier = var.tier + } + # stale cache error when trying to resolve + # labels = { + # "istio-injection" : "enabled" + # } + } +} + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.technitium.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +# CoreDNS Corefile - manages cluster DNS resolution +# The viktorbarzin.lan block forwards to Technitium via LoadBalancer. +# A template regex in the viktorbarzin.lan block short-circuits junk queries +# caused by ndots:5 search domain expansion (e.g. www.cloudflare.com.viktorbarzin.lan, +# redis.redis.svc.cluster.local.viktorbarzin.lan) by returning NXDOMAIN for any +# query with 2+ labels before .viktorbarzin.lan. Legitimate single-label queries +# (e.g. idrac.viktorbarzin.lan) fall through to Technitium. +resource "kubernetes_config_map" "coredns" { + metadata { + name = "coredns" + namespace = "kube-system" + } + + data = { + Corefile = <<-EOF + .:53 { + #log + errors + health { + lameduck 5s + } + ready + kubernetes cluster.local in-addr.arpa ip6.arpa { + pods insecure + fallthrough in-addr.arpa ip6.arpa + ttl 30 + } + prometheus :9153 + forward . 8.8.8.8 1.1.1.1 10.0.20.1 + cache { + success 10000 300 6 + denial 10000 300 60 + } + loop + reload + loadbalance + } + viktorbarzin.lan:53 { + #log + errors + template ANY ANY viktorbarzin.lan { + match ".*\..*\.viktorbarzin\.lan\.$" + rcode NXDOMAIN + fallthrough + } + forward . 10.0.20.204 # Technitium LoadBalancer + cache { + success 10000 300 6 + denial 10000 300 60 + } + } + EOF + } +} + +module "nfs_config" { + source = "../../../../modules/kubernetes/nfs_volume" + name = "technitium-config" + namespace = kubernetes_namespace.technitium.metadata[0].name + nfs_server = var.nfs_server + nfs_path = "/mnt/main/technitium" +} + +resource "kubernetes_deployment" "technitium" { + # resource "kubernetes_daemonset" "technitium" { + metadata { + name = "technitium" + namespace = kubernetes_namespace.technitium.metadata[0].name + labels = { + app = "technitium" + tier = var.tier + } + } + spec { + strategy { + type = "RollingUpdate" + rolling_update { + max_unavailable = "0" + max_surge = "1" + } + } + # replicas = 1 + selector { + match_labels = { + app = "technitium" + } + } + template { + metadata { + annotations = { + "diun.enable" = "false" + # "diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$" + "diun.include_tags" = "latest" + } + labels = { + app = "technitium" + "dns-server" = "true" + } + } + spec { + affinity { + # Prefer nodes running Traefik for network locality + pod_affinity { + preferred_during_scheduling_ignored_during_execution { + weight = 100 + pod_affinity_term { + label_selector { + match_expressions { + key = "app.kubernetes.io/name" + operator = "In" + values = ["traefik"] + } + } + topology_key = "kubernetes.io/hostname" + } + } + } + # Spread DNS pods across nodes for HA + pod_anti_affinity { + required_during_scheduling_ignored_during_execution { + label_selector { + match_expressions { + key = "dns-server" + operator = "In" + values = ["true"] + } + } + topology_key = "kubernetes.io/hostname" + } + } + } + container { + image = "technitium/dns-server:latest" + name = "technitium" + resources { + requests = { + cpu = "25m" + memory = "512Mi" + } + limits = { + memory = "512Mi" + } + } + port { + container_port = 5380 + } + port { + container_port = 53 + } + port { + container_port = 80 + } + liveness_probe { + tcp_socket { + port = 53 + } + initial_delay_seconds = 10 + period_seconds = 10 + } + readiness_probe { + tcp_socket { + port = 53 + } + initial_delay_seconds = 5 + period_seconds = 5 + } + volume_mount { + mount_path = "/etc/dns" + name = "nfs-config" + } + volume_mount { + mount_path = "/etc/tls/" + name = "tls-cert" + } + } + volume { + name = "nfs-config" + persistent_volume_claim { + claim_name = module.nfs_config.claim_name + } + } + volume { + name = "tls-cert" + secret { + secret_name = var.tls_secret_name + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } +} + +resource "kubernetes_service" "technitium-web" { + metadata { + name = "technitium-web" + namespace = kubernetes_namespace.technitium.metadata[0].name + labels = { + "app" = "technitium" + } + # annotations = { + # "metallb.universe.tf/allow-shared-ip" : "shared" + # } + } + + spec { + # type = "LoadBalancer" + # external_traffic_policy = "Cluster" + selector = { + app = "technitium" + } + port { + name = "technitium-dns" + port = "5380" + protocol = "TCP" + } + port { + name = "technitium-doh" + port = "80" + protocol = "TCP" + } + } +} + +resource "kubernetes_service" "technitium-dns" { + metadata { + name = "technitium-dns" + namespace = kubernetes_namespace.technitium.metadata[0].name + labels = { + "app" = "technitium" + } + } + + spec { + type = "LoadBalancer" + port { + name = "technitium-dns" + port = 53 + protocol = "UDP" + } + external_traffic_policy = "Local" + selector = { + "dns-server" = "true" + } + } +} +module "ingress" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.technitium.metadata[0].name + name = "technitium" + tls_secret_name = var.tls_secret_name + port = 5380 + service_name = "technitium-web" + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/description" = "Internal DNS Server and Recursive Resolver" + "gethomepage.dev/group" = "Infrastructure" + "gethomepage.dev/icon" : "technitium.png" + "gethomepage.dev/name" = "Technitium" + "gethomepage.dev/widget.type" = "technitium" + "gethomepage.dev/widget.url" = "http://technitium-web.technitium.svc.cluster.local:5380" + "gethomepage.dev/widget.key" = var.homepage_token + + "gethomepage.dev/widget.range" = "LastWeek" + "gethomepage.dev/widget.fields" = "[\"totalQueries\", \"totalCached\", \"totalBlocked\", \"totalRecursive\"]" + "gethomepage.dev/pod-selector" = "" + } +} + +module "ingress-doh" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.technitium.metadata[0].name + name = "technitium-doh" + tls_secret_name = var.tls_secret_name + host = "dns" + service_name = "technitium-web" +} + +# Grafana datasource for Technitium DNS query logs in MySQL +resource "kubernetes_config_map" "grafana_technitium_datasource" { + metadata { + name = "grafana-technitium-datasource" + namespace = "monitoring" + labels = { + grafana_datasource = "1" + } + } + data = { + "technitium-datasource.yaml" = yamlencode({ + apiVersion = 1 + datasources = [{ + name = "Technitium MySQL" + type = "mysql" + access = "proxy" + url = "${var.mysql_host}:3306" + database = "technitium" + user = "technitium" + uid = "technitium-mysql" + secureJsonData = { + password = var.technitium_db_password + } + }] + }) + } +} + +# Grafana dashboard for Technitium DNS query logs +resource "kubernetes_config_map" "grafana_technitium_dashboard" { + metadata { + name = "grafana-technitium-dashboard" + namespace = "monitoring" + labels = { + grafana_dashboard = "1" + } + } + data = { + "technitium-dns.json" = file("${path.module}/dashboards/technitium-dns.json") + } +} + diff --git a/stacks/technitium/secrets b/stacks/technitium/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/technitium/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/technitium/terragrunt.hcl b/stacks/technitium/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/technitium/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/technitium/tiers.tf b/stacks/technitium/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/technitium/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/traefik/main.tf b/stacks/traefik/main.tf new file mode 100644 index 00000000..c87cba71 --- /dev/null +++ b/stacks/traefik/main.tf @@ -0,0 +1,16 @@ +variable "tls_secret_name" { type = string } +variable "redis_host" { type = string } + +data "vault_kv_secret_v2" "secrets" { + mount = "secret" + name = "platform" +} + +module "traefik" { + source = "./modules/traefik" + tier = local.tiers.core + crowdsec_api_key = data.vault_kv_secret_v2.secrets.data["ingress_crowdsec_api_key"] + redis_host = var.redis_host + tls_secret_name = var.tls_secret_name + auth_fallback_htpasswd = data.vault_kv_secret_v2.secrets.data["auth_fallback_htpasswd"] +} diff --git a/stacks/traefik/modules/traefik/main.tf b/stacks/traefik/modules/traefik/main.tf new file mode 100644 index 00000000..6428322c --- /dev/null +++ b/stacks/traefik/modules/traefik/main.tf @@ -0,0 +1,629 @@ +variable "tier" { type = string } +variable "crowdsec_api_key" { + type = string + sensitive = true +} +variable "redis_host" { type = string } +variable "tls_secret_name" {} +variable "auth_fallback_htpasswd" { + type = string + description = "htpasswd-format string for emergency basicAuth fallback when Authentik is down" + sensitive = true +} + +resource "kubernetes_namespace" "traefik" { + metadata { + name = "traefik" + labels = { + "app.kubernetes.io/name" = "traefik" + "app.kubernetes.io/instance" = "traefik" + tier = var.tier + } + } +} + +resource "helm_release" "traefik" { + namespace = kubernetes_namespace.traefik.metadata[0].name + create_namespace = false + name = "traefik" + repository = "https://traefik.github.io/charts" + chart = "traefik" + atomic = true + timeout = 600 + + values = [yamlencode({ + deployment = { + replicas = 3 + podAnnotations = { + "diun.enable" = "true" + "diun.include_tags" = "^v\\d+(?:\\.\\d+)?(?:\\.\\d+)?.*$" + } + initContainers = [{ + name = "download-plugins" + image = "alpine:3" + command = ["sh", "-c", join("", [ + "set -e; ", + "STORAGE=/plugins-storage; ", + "mkdir -p \"$STORAGE/archives/github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin\"; ", + "mkdir -p \"$STORAGE/archives/github.com/packruler/rewrite-body\"; ", + "wget -q -T 30 -O \"$STORAGE/archives/github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin/v1.4.2.zip\" ", + "\"https://github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin/archive/refs/tags/v1.4.2.zip\"; ", + "wget -q -T 30 -O \"$STORAGE/archives/github.com/packruler/rewrite-body/v1.2.0.zip\" ", + "\"https://github.com/packruler/rewrite-body/archive/refs/tags/v1.2.0.zip\"; ", + "printf '{\"github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin\":\"v1.4.2\",\"github.com/packruler/rewrite-body\":\"v1.2.0\"}' ", + "> \"$STORAGE/archives/state.json\"; ", + "echo \"Plugins pre-downloaded successfully\"", + ])] + volumeMounts = [{ + name = "plugins" + mountPath = "/plugins-storage" + }] + }] + } + + updateStrategy = { + type = "RollingUpdate" + rollingUpdate = { + maxUnavailable = 0 + maxSurge = 1 + } + } + + ingressClass = { + enabled = true + isDefaultClass = true + } + + providers = { + kubernetesIngress = { + enabled = true + allowExternalNameServices = true + publishedService = { enabled = true } + } + kubernetesCRD = { + enabled = true + allowExternalNameServices = true + allowCrossNamespace = true + } + } + + # Enable dashboard API (accessible on port 8080 internally) + api = { + insecure = false + } + + # Entrypoints + ports = { + web = { + port = 8000 + exposedPort = 80 + protocol = "TCP" + http = { + redirections = { + entryPoint = { + to = "websecure" + scheme = "https" + } + } + } + } + websecure = { + port = 8443 + exposedPort = 443 + protocol = "TCP" + http = { + tls = { + enabled = true + } + } + http3 = { + enabled = true + advertisedPort = 443 + } + } + whisper-tcp = { + port = 10300 + exposedPort = 10300 + protocol = "TCP" + expose = { default = true } + } + piper-tcp = { + port = 10200 + exposedPort = 10200 + protocol = "TCP" + expose = { default = true } + } + ollama-tcp = { + port = 11434 + exposedPort = 11434 + protocol = "TCP" + expose = { default = true } + } + } + + service = { + type = "LoadBalancer" + annotations = { + "metallb.universe.tf/loadBalancerIPs" = "10.0.20.202" + } + spec = { + externalTrafficPolicy = "Local" + } + } + + # Plugins + experimental = { + plugins = { + crowdsec-bouncer = { + moduleName = "github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin" + version = "v1.4.2" + } + rewrite-body = { + moduleName = "github.com/packruler/rewrite-body" + version = "v1.2.0" + } + } + } + + # Prometheus metrics + metrics = { + prometheus = { + entryPoint = "metrics" + addEntryPointsLabels = true + addServicesLabels = true + addRoutersLabels = true + } + } + + # Access logs + logs = { + access = { + enabled = true + } + } + + additionalArguments = [ + "--global.checknewversion=false", + "--global.sendanonymoususage=false", + # Skip TLS verification for self-signed backend certs (proxmox, idrac, etc.) + "--serversTransport.insecureSkipVerify=true", + # Increase timeouts for services like Immich + "--serversTransport.forwardingTimeouts.dialTimeout=60s", + "--serversTransport.forwardingTimeouts.responseHeaderTimeout=30s", + "--serversTransport.forwardingTimeouts.idleConnTimeout=90s", + # Use forwarded headers from trusted proxies + "--entryPoints.websecure.forwardedHeaders.insecure=false", + "--entryPoints.web.forwardedHeaders.insecure=false", + "--entryPoints.websecure.forwardedHeaders.trustedIPs=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22,10.0.0.0/8,192.168.0.0/16", + "--entryPoints.web.forwardedHeaders.trustedIPs=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22,10.0.0.0/8,192.168.0.0/16", + ] + + resources = { + requests = { + cpu = "100m" + memory = "384Mi" + } + limits = { + memory = "384Mi" + } + } + + nodeSelector = { + "kubernetes.io/os" = "linux" + } + + tolerations = [] + + topologySpreadConstraints = [{ + maxSkew = 1 + topologyKey = "kubernetes.io/hostname" + whenUnsatisfiable = "DoNotSchedule" + labelSelector = { + matchLabels = { + "app.kubernetes.io/name" = "traefik" + } + } + }] + + podDisruptionBudget = { + enabled = true + minAvailable = 2 + } + })] +} + +# Dashboard resources +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.traefik.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +resource "kubernetes_service" "traefik_dashboard" { + metadata { + name = "traefik-dashboard" + namespace = kubernetes_namespace.traefik.metadata[0].name + labels = { + "app" = "traefik-dashboard" + } + } + + spec { + selector = { + "app.kubernetes.io/name" = "traefik" + } + port { + name = "http" + port = 8080 + target_port = 8080 + protocol = "TCP" + } + } +} + +module "ingress" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.traefik.metadata[0].name + name = "traefik" + service_name = "traefik-dashboard" + host = "traefik" + port = 8080 + tls_secret_name = var.tls_secret_name + protected = true + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Traefik" + "gethomepage.dev/description" = "Reverse proxy & ingress" + "gethomepage.dev/icon" = "traefik.png" + "gethomepage.dev/group" = "Core Platform" + "gethomepage.dev/pod-selector" = "" + } +} + +# Bot-block resilience proxy: nginx reverse proxy in front of Poison Fountain +# Returns 200 (allow all traffic) if Poison Fountain is unreachable (fail-open) +resource "kubernetes_config_map" "bot_block_proxy_config" { + metadata { + name = "bot-block-proxy-config" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + + data = { + "default.conf" = <<-EOT + upstream poison_fountain { + server poison-fountain.poison-fountain.svc.cluster.local:8080; + } + server { + listen 8080; + location /auth { + proxy_pass http://poison_fountain; + proxy_connect_timeout 3s; + proxy_read_timeout 5s; + proxy_send_timeout 5s; + proxy_intercept_errors on; + error_page 502 503 504 =200 /fallback-allow; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + location = /fallback-allow { + internal; + return 200 "allowed"; + } + location /healthz { + access_log off; + return 200 "ok"; + } + } + EOT + } +} + +resource "kubernetes_deployment" "bot_block_proxy" { + metadata { + name = "bot-block-proxy" + namespace = kubernetes_namespace.traefik.metadata[0].name + labels = { + app = "bot-block-proxy" + } + } + + spec { + replicas = 2 + strategy { + type = "RollingUpdate" + rolling_update { + max_unavailable = 0 + max_surge = 1 + } + } + selector { + match_labels = { + app = "bot-block-proxy" + } + } + template { + metadata { + labels = { + app = "bot-block-proxy" + } + } + spec { + topology_spread_constraint { + max_skew = 1 + topology_key = "kubernetes.io/hostname" + when_unsatisfiable = "DoNotSchedule" + label_selector { + match_labels = { + app = "bot-block-proxy" + } + } + } + container { + name = "nginx" + image = "nginx:1-alpine" + + port { + container_port = 8080 + } + + volume_mount { + name = "config" + mount_path = "/etc/nginx/conf.d" + read_only = true + } + + liveness_probe { + http_get { + path = "/healthz" + port = 8080 + } + initial_delay_seconds = 3 + period_seconds = 10 + } + readiness_probe { + http_get { + path = "/healthz" + port = 8080 + } + initial_delay_seconds = 2 + period_seconds = 5 + } + + resources { + requests = { + cpu = "5m" + memory = "64Mi" + } + limits = { + memory = "64Mi" + } + } + } + + volume { + name = "config" + config_map { + name = kubernetes_config_map.bot_block_proxy_config.metadata[0].name + } + } + } + } + } +} + +resource "kubernetes_service" "bot_block_proxy" { + metadata { + name = "bot-block-proxy" + namespace = kubernetes_namespace.traefik.metadata[0].name + labels = { + app = "bot-block-proxy" + } + } + + spec { + selector = { + app = "bot-block-proxy" + } + port { + name = "http" + port = 8080 + target_port = 8080 + } + } +} + +# Resilience proxy for Authentik ForwardAuth +# Falls back to basicAuth when Authentik is unreachable +resource "kubernetes_secret" "auth_proxy_htpasswd" { + metadata { + name = "auth-proxy-htpasswd" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + + data = { + "htpasswd" = var.auth_fallback_htpasswd + } +} + +resource "kubernetes_config_map" "auth_proxy_config" { + metadata { + name = "auth-proxy-config" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + + data = { + "default.conf" = <<-EOT + upstream authentik { + server ak-outpost-authentik-embedded-outpost.authentik.svc.cluster.local:9000; + } + server { + listen 9000; + + location /outpost.goauthentik.io/auth/traefik { + proxy_pass http://authentik; + proxy_connect_timeout 3s; + proxy_read_timeout 5s; + proxy_send_timeout 5s; + proxy_intercept_errors on; + error_page 502 503 504 = @fallback_auth; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Original-URL $scheme://$http_host$request_uri; + } + + location @fallback_auth { + auth_basic "Emergency Access"; + auth_basic_user_file /etc/nginx/htpasswd; + add_header X-authentik-username $remote_user always; + add_header X-Auth-Fallback "true" always; + return 200; + } + + location /outpost.goauthentik.io/ { + proxy_pass http://authentik; + proxy_connect_timeout 3s; + proxy_read_timeout 10s; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /healthz { + access_log off; + return 200 "ok"; + } + } + EOT + } +} + +resource "kubernetes_deployment" "auth_proxy" { + metadata { + name = "auth-proxy" + namespace = kubernetes_namespace.traefik.metadata[0].name + labels = { + app = "auth-proxy" + } + } + + spec { + replicas = 2 + strategy { + type = "RollingUpdate" + rolling_update { + max_unavailable = 0 + max_surge = 1 + } + } + selector { + match_labels = { + app = "auth-proxy" + } + } + template { + metadata { + labels = { + app = "auth-proxy" + } + } + spec { + topology_spread_constraint { + max_skew = 1 + topology_key = "kubernetes.io/hostname" + when_unsatisfiable = "DoNotSchedule" + label_selector { + match_labels = { + app = "auth-proxy" + } + } + } + container { + name = "nginx" + image = "nginx:1-alpine" + + port { + container_port = 9000 + } + + volume_mount { + name = "config" + mount_path = "/etc/nginx/conf.d" + read_only = true + } + volume_mount { + name = "htpasswd" + mount_path = "/etc/nginx/htpasswd" + sub_path = "htpasswd" + read_only = true + } + + liveness_probe { + http_get { + path = "/healthz" + port = 9000 + } + initial_delay_seconds = 3 + period_seconds = 10 + } + readiness_probe { + http_get { + path = "/healthz" + port = 9000 + } + initial_delay_seconds = 2 + period_seconds = 5 + } + + resources { + requests = { + cpu = "5m" + memory = "64Mi" + } + limits = { + memory = "64Mi" + } + } + } + + volume { + name = "config" + config_map { + name = kubernetes_config_map.auth_proxy_config.metadata[0].name + } + } + volume { + name = "htpasswd" + secret { + secret_name = kubernetes_secret.auth_proxy_htpasswd.metadata[0].name + } + } + } + } + } +} + +resource "kubernetes_service" "auth_proxy" { + metadata { + name = "auth-proxy" + namespace = kubernetes_namespace.traefik.metadata[0].name + labels = { + app = "auth-proxy" + } + } + + spec { + selector = { + app = "auth-proxy" + } + port { + name = "http" + port = 9000 + target_port = 9000 + } + } +} diff --git a/stacks/traefik/modules/traefik/middleware.tf b/stacks/traefik/modules/traefik/middleware.tf new file mode 100644 index 00000000..6e720e09 --- /dev/null +++ b/stacks/traefik/modules/traefik/middleware.tf @@ -0,0 +1,363 @@ +# Shared Traefik Middleware CRDs +# These are referenced by ingress resources via annotations like: +# "traefik.ingress.kubernetes.io/router.middlewares" = "traefik-rate-limit@kubernetescrd" + +# Rate limiting middleware +resource "kubernetes_manifest" "middleware_rate_limit" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "rate-limit" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + rateLimit = { + average = 10 + burst = 50 + } + } + } + + depends_on = [helm_release.traefik] +} + +# Authentik forward auth middleware +resource "kubernetes_manifest" "middleware_authentik_forward_auth" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "authentik-forward-auth" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + forwardAuth = { + address = "http://auth-proxy.traefik.svc.cluster.local:9000/outpost.goauthentik.io/auth/traefik" + trustForwardHeader = true + authResponseHeaders = [ + "X-authentik-username", + "X-authentik-uid", + "X-authentik-email", + "X-authentik-name", + "X-authentik-groups", + "Set-Cookie", + ] + } + } + } + + depends_on = [helm_release.traefik] +} + +# IP allowlist for local-only access +resource "kubernetes_manifest" "middleware_local_only" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "local-only" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + ipAllowList = { + sourceRange = [ + "192.168.1.0/24", + "10.0.0.0/8", + "fc00::/7", + "fe80::/10", + ] + } + } + } + + depends_on = [helm_release.traefik] +} + +# HTTPS redirect middleware +resource "kubernetes_manifest" "middleware_redirect_https" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "redirect-https" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + redirectScheme = { + scheme = "https" + permanent = true + } + } + } + + depends_on = [helm_release.traefik] +} + +# CSP headers middleware (default) +resource "kubernetes_manifest" "middleware_csp_headers" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "csp-headers" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + headers = { + contentSecurityPolicy = "frame-ancestors 'self' *.viktorbarzin.me viktorbarzin.me" + } + } + } + + depends_on = [helm_release.traefik] +} + +# Security headers middleware (HSTS, X-Frame-Options, etc.) +resource "kubernetes_manifest" "middleware_security_headers" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "security-headers" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + headers = { + stsSeconds = 31536000 + stsIncludeSubdomains = true + frameDeny = true + contentTypeNosniff = true + browserXssFilter = true + referrerPolicy = "strict-origin-when-cross-origin" + permissionsPolicy = "camera=(), microphone=(), geolocation=()" + } + } + } + + depends_on = [helm_release.traefik] +} + +# CrowdSec bouncer plugin middleware +resource "kubernetes_manifest" "middleware_crowdsec" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "crowdsec" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + plugin = { + crowdsec-bouncer = { + crowdsecLapiKey = var.crowdsec_api_key + crowdsecLapiHost = "crowdsec-service.crowdsec.svc.cluster.local:8080" + crowdsecMode = "stream" + updateMaxFailure = -1 # fail-open: serve from cache when LAPI is unreachable + redisCacheEnabled = true + redisCacheHost = var.redis_host + redisCacheUnreachableBlock = false # don't block traffic if Redis is also unreachable + clientTrustedIPs = ["10.0.20.0/24", "10.10.0.0/16"] # node + pod CIDRs bypass CrowdSec + } + } + } + } + + depends_on = [helm_release.traefik] +} + +# TLS option for mTLS (client certificate auth) +resource "kubernetes_manifest" "tls_option_mtls" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "TLSOption" + metadata = { + name = "mtls" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + clientAuth = { + secretNames = ["ca-secret"] + clientAuthType = "RequireAndVerifyClientCert" + } + } + } + + depends_on = [helm_release.traefik] +} + +# ServersTransport for backends with self-signed certificates +resource "kubernetes_manifest" "servers_transport_insecure" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "ServersTransport" + metadata = { + name = "insecure-skip-verify" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + insecureSkipVerify = true + } + } + + depends_on = [helm_release.traefik] +} + +# Strip Authentik auth headers/cookies before forwarding to backend +# Useful for backends (iDRAC, TP-Link) that break when receiving extra headers +resource "kubernetes_manifest" "middleware_strip_auth_headers" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "strip-auth-headers" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + headers = { + customRequestHeaders = { + "X-authentik-username" = "" + "X-authentik-uid" = "" + "X-authentik-email" = "" + "X-authentik-name" = "" + "X-authentik-groups" = "" + } + } + } + } + + depends_on = [helm_release.traefik] +} + +# Immich-specific rate limit (higher limits for photo uploads) +resource "kubernetes_manifest" "middleware_immich_rate_limit" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "immich-rate-limit" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + rateLimit = { + average = 100 + burst = 1000 + } + } + } + + depends_on = [helm_release.traefik] +} + +# Strip Accept-Encoding header so backends send uncompressed responses. +# Used alongside rewrite-body plugin (rybbit analytics) which fails to +# decompress certain gzip responses (flate: corrupt input before offset 5). +# Also used by anti-AI trap links rewrite-body middleware. +resource "kubernetes_manifest" "middleware_strip_accept_encoding" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "strip-accept-encoding" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + headers = { + customRequestHeaders = { + "Accept-Encoding" = "" + } + } + } + } + + depends_on = [helm_release.traefik] +} + +# ForwardAuth middleware to block known AI bot User-Agents +resource "kubernetes_manifest" "middleware_ai_bot_block" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "ai-bot-block" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + forwardAuth = { + address = "http://bot-block-proxy.traefik.svc.cluster.local:8080/auth" + trustForwardHeader = true + } + } + } + + depends_on = [helm_release.traefik] +} + +# X-Robots-Tag header to discourage compliant AI crawlers +resource "kubernetes_manifest" "middleware_anti_ai_headers" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "anti-ai-headers" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + headers = { + customResponseHeaders = { + "X-Robots-Tag" = "noai, noimageai" + } + } + } + } + + depends_on = [helm_release.traefik] +} + +# Inject hidden trap links before to catch AI scrapers +# Links are CSS-hidden and aria-hidden so humans never see them +resource "kubernetes_manifest" "middleware_anti_ai_trap_links" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "anti-ai-trap-links" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + plugin = { + rewrite-body = { + rewrites = [{ + regex = "" + replacement = "" + }] + monitoring = { + types = ["text/html"] + } + } + } + } + } + + depends_on = [helm_release.traefik] +} + +# Retry middleware for transient backend failures (502/503 during restarts) +resource "kubernetes_manifest" "middleware_retry" { + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "retry" + namespace = kubernetes_namespace.traefik.metadata[0].name + } + spec = { + retry = { + attempts = 2 + initialInterval = "100ms" + } + } + } + + depends_on = [helm_release.traefik] +} diff --git a/stacks/traefik/secrets b/stacks/traefik/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/traefik/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/traefik/terragrunt.hcl b/stacks/traefik/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/traefik/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/traefik/tiers.tf b/stacks/traefik/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/traefik/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/uptime-kuma/main.tf b/stacks/uptime-kuma/main.tf new file mode 100644 index 00000000..d6b61227 --- /dev/null +++ b/stacks/uptime-kuma/main.tf @@ -0,0 +1,9 @@ +variable "tls_secret_name" { type = string } +variable "nfs_server" { type = string } + +module "uptime-kuma" { + source = "./modules/uptime-kuma" + tls_secret_name = var.tls_secret_name + nfs_server = var.nfs_server + tier = local.tiers.cluster +} diff --git a/stacks/uptime-kuma/modules/uptime-kuma/main.tf b/stacks/uptime-kuma/modules/uptime-kuma/main.tf new file mode 100644 index 00000000..7215015f --- /dev/null +++ b/stacks/uptime-kuma/modules/uptime-kuma/main.tf @@ -0,0 +1,216 @@ +variable "tls_secret_name" {} +variable "tier" { type = string } +variable "nfs_server" { type = string } + +resource "kubernetes_namespace" "uptime-kuma" { + metadata { + name = "uptime-kuma" + labels = { + tier = var.tier + } + # labels = { + # "istio-injection" : "enabled" + # } + } +} + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.uptime-kuma.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +module "nfs_data" { + source = "../../../../modules/kubernetes/nfs_volume" + name = "uptime-kuma-data" + namespace = kubernetes_namespace.uptime-kuma.metadata[0].name + nfs_server = var.nfs_server + nfs_path = "/mnt/main/uptime-kuma" +} + +resource "kubernetes_deployment" "uptime-kuma" { + metadata { + name = "uptime-kuma" + namespace = kubernetes_namespace.uptime-kuma.metadata[0].name + labels = { + app = "uptime-kuma" + tier = var.tier + } + annotations = { + "reloader.stakater.com/search" = "true" + } + } + spec { + replicas = 1 + strategy { + type = "Recreate" + } + selector { + match_labels = { + app = "uptime-kuma" + } + } + template { + metadata { + annotations = { + "diun.enable" = "true" + "diun.include_tags" = "latest" + } + labels = { + app = "uptime-kuma" + } + } + spec { + container { + image = "louislam/uptime-kuma:2" + name = "uptime-kuma" + + resources { + requests = { + cpu = "50m" + memory = "64Mi" + } + limits = { + memory = "512Mi" + } + } + + port { + container_port = 3001 + } + liveness_probe { + http_get { + path = "/" + port = 3001 + } + initial_delay_seconds = 15 + period_seconds = 30 + timeout_seconds = 5 + failure_threshold = 5 + } + readiness_probe { + http_get { + path = "/" + port = 3001 + } + initial_delay_seconds = 5 + period_seconds = 30 + timeout_seconds = 5 + failure_threshold = 3 + } + volume_mount { + name = "data" + mount_path = "/app/data" + } + } + volume { + name = "data" + persistent_volume_claim { + claim_name = module.nfs_data.claim_name + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } +} +resource "kubernetes_service" "uptime-kuma" { + metadata { + name = "uptime-kuma" + namespace = kubernetes_namespace.uptime-kuma.metadata[0].name + labels = { + "app" = "uptime-kuma" + } + } + + spec { + selector = { + app = "uptime-kuma" + } + port { + port = "80" + target_port = "3001" + } + } +} +module "ingress" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.uptime-kuma.metadata[0].name + name = "uptime" + tls_secret_name = var.tls_secret_name + service_name = "uptime-kuma" + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/description" = "Uptime monitor" + "gethomepage.dev/group" = "Core Platform" + "gethomepage.dev/icon" : "uptime-kuma.png" + "gethomepage.dev/name" = "Uptime Kuma" + "gethomepage.dev/pod-selector" = "" + "gethomepage.dev/widget.type" = "uptimekuma" + "gethomepage.dev/widget.url" = "http://uptime-kuma.uptime-kuma.svc.cluster.local" + "gethomepage.dev/widget.slug" = "infra" + } + rybbit_site_id = "8fef77b1f7fe" +} + +# CronJob for daily SQLite backups # no longer needed as we're using the mysql +# resource "kubernetes_cron_job_v1" "sqlite-backup" { +# metadata { +# name = "backup" +# namespace = kubernetes_namespace.uptime-kuma.metadata[0].name +# } +# spec { +# concurrency_policy = "Replace" +# failed_jobs_history_limit = 5 +# schedule = "0 0 * * *" +# # schedule = "* * * * *" +# starting_deadline_seconds = 10 +# successful_jobs_history_limit = 3 +# job_template { +# metadata {} +# spec { +# active_deadline_seconds = 600 # should finish in 10 minutes +# backoff_limit = 3 +# ttl_seconds_after_finished = 10 +# template { +# metadata {} +# spec { +# container { +# name = "backup" +# image = "alpine/sqlite:latest" +# command = ["/bin/sh", "-c", <<-EOT +# set -e +# export now=$(date +"%Y_%m_%d_%H_%M") +# echo "Backing up SQLite database to /app/data/backup/backup_$now.sqlite" +# sqlite3 /app/data/kuma.db ".backup /app/data/backup/backup_$now.sqlite" +# echo "Backup completed. Deleting old backups..." + +# # Rotate - delete last log file +# cd /app/data/backup +# find . -name "*.sqlite" -type f -mtime +7 -delete # 7 day retention of backups +# echo "Old backups deleted." +# EOT +# ] +# volume_mount { +# name = "data" +# mount_path = "/app/data" +# } +# } +# volume { +# name = "data" +# nfs { +# server = var.nfs_server +# path = "/mnt/main/uptime-kuma" +# } +# } +# } +# } +# } +# } +# } +# } diff --git a/stacks/uptime-kuma/secrets b/stacks/uptime-kuma/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/uptime-kuma/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/uptime-kuma/terragrunt.hcl b/stacks/uptime-kuma/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/uptime-kuma/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/uptime-kuma/tiers.tf b/stacks/uptime-kuma/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/uptime-kuma/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/vaultwarden/main.tf b/stacks/vaultwarden/main.tf new file mode 100644 index 00000000..2cafca09 --- /dev/null +++ b/stacks/vaultwarden/main.tf @@ -0,0 +1,17 @@ +variable "tls_secret_name" { type = string } +variable "nfs_server" { type = string } +variable "mail_host" { type = string } + +data "vault_kv_secret_v2" "secrets" { + mount = "secret" + name = "platform" +} + +module "vaultwarden" { + source = "./modules/vaultwarden" + tls_secret_name = var.tls_secret_name + mail_host = var.mail_host + smtp_password = data.vault_kv_secret_v2.secrets.data["vaultwarden_smtp_password"] + tier = local.tiers.edge + nfs_server = var.nfs_server +} diff --git a/stacks/vaultwarden/modules/vaultwarden/main.tf b/stacks/vaultwarden/modules/vaultwarden/main.tf new file mode 100644 index 00000000..cc4a9723 --- /dev/null +++ b/stacks/vaultwarden/modules/vaultwarden/main.tf @@ -0,0 +1,297 @@ +variable "tls_secret_name" {} +variable "tier" { type = string } +variable "smtp_password" {} +variable "mail_host" { type = string } +variable "nfs_server" { type = string } + +resource "kubernetes_namespace" "vaultwarden" { + metadata { + name = "vaultwarden" + labels = { + "istio-injection" : "disabled" + tier = var.tier + } + } +} + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.vaultwarden.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +resource "kubernetes_persistent_volume_claim" "vaultwarden_data" { + metadata { + name = "vaultwarden-data-iscsi" + namespace = kubernetes_namespace.vaultwarden.metadata[0].name + } + spec { + access_modes = ["ReadWriteOnce"] + storage_class_name = "iscsi-truenas" + resources { + requests = { + storage = "1Gi" + } + } + } +} + +resource "kubernetes_deployment" "vaultwarden" { + metadata { + name = "vaultwarden" + namespace = kubernetes_namespace.vaultwarden.metadata[0].name + labels = { + app = "vaultwarden" + tier = var.tier + } + annotations = { + "reloader.stakater.com/search" = "true" + } + } + spec { + replicas = 1 + strategy { + type = "Recreate" + } + selector { + match_labels = { + app = "vaultwarden" + } + } + template { + metadata { + annotations = { + "diun.enable" = "true" + "diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$" + } + labels = { + "app" = "vaultwarden" + } + } + spec { + container { + image = "vaultwarden/server:1.35.4" + name = "vaultwarden" + + resources { + requests = { + cpu = "10m" + memory = "256Mi" + } + limits = { + memory = "256Mi" + } + } + + env { + name = "DOMAIN" + value = "https://vaultwarden.viktorbarzin.me" + } + # env { + # name = "ADMIN_TOKEN" + # value = "" + # } + env { + name = "SMTP_HOST" + value = var.mail_host + } + env { + name = "SMTP_FROM" + value = "vaultwarden@viktorbarzin.me" + } + env { + name = "SMTP_PORT" + value = "587" + } + env { + name = "SMTP_SECURITY" + value = "starttls" + } + env { + name = "SMTP_USERNAME" + value = "vaultwarden@viktorbarzin.me" + } + env { + name = "SMTP_PASSWORD" + value = var.smtp_password + } + + port { + container_port = 80 + } + liveness_probe { + http_get { + path = "/alive" + port = 80 + } + initial_delay_seconds = 15 + period_seconds = 30 + timeout_seconds = 5 + failure_threshold = 5 + } + readiness_probe { + http_get { + path = "/alive" + port = 80 + } + initial_delay_seconds = 5 + period_seconds = 30 + timeout_seconds = 5 + failure_threshold = 3 + } + volume_mount { + name = "data" + mount_path = "/data" + } + } + volume { + name = "data" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.vaultwarden_data.metadata[0].name + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } +} + +resource "kubernetes_service" "vaultwarden" { + metadata { + name = "vaultwarden" + namespace = kubernetes_namespace.vaultwarden.metadata[0].name + labels = { + "app" = "vaultwarden" + } + } + + spec { + selector = { + app = "vaultwarden" + } + port { + name = "http" + port = "80" + protocol = "TCP" + } + } +} + +module "ingress" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.vaultwarden.metadata[0].name + name = "vaultwarden" + tls_secret_name = var.tls_secret_name + rybbit_site_id = "b8fc85e18683" + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Vaultwarden" + "gethomepage.dev/description" = "Password manager" + "gethomepage.dev/icon" = "vaultwarden.png" + "gethomepage.dev/group" = "Other" + "gethomepage.dev/pod-selector" = "" + } +} + +# ----------------------------------------------------------------------------- +# Backup — Daily SQLite + data files to NFS +# ----------------------------------------------------------------------------- + +module "nfs_vaultwarden_backup" { + source = "../../../../modules/kubernetes/nfs_volume" + name = "vaultwarden-backup" + namespace = kubernetes_namespace.vaultwarden.metadata[0].name + nfs_server = var.nfs_server + nfs_path = "/mnt/main/vaultwarden-backup" +} + +resource "kubernetes_cron_job_v1" "vaultwarden-backup" { + metadata { + name = "vaultwarden-backup" + namespace = kubernetes_namespace.vaultwarden.metadata[0].name + } + spec { + concurrency_policy = "Replace" + failed_jobs_history_limit = 5 + schedule = "0 0 * * *" + starting_deadline_seconds = 10 + successful_jobs_history_limit = 10 + job_template { + metadata {} + spec { + backoff_limit = 3 + ttl_seconds_after_finished = 10 + template { + metadata {} + spec { + affinity { + pod_affinity { + required_during_scheduling_ignored_during_execution { + label_selector { + match_labels = { + app = "vaultwarden" + } + } + topology_key = "kubernetes.io/hostname" + } + } + } + container { + name = "vaultwarden-backup" + image = "docker.io/library/alpine" + command = ["/bin/sh", "-c", <<-EOT + set -euxo pipefail + apk add --no-cache sqlite + now=$(date +"%Y_%m_%d_%H_%M") + mkdir -p /backup/$now + # Safe SQLite backup (handles WAL/locks) + sqlite3 /data/db.sqlite3 ".backup /backup/$now/db.sqlite3" + # Copy RSA keys, attachments, sends, config + cp -a /data/rsa_key.pem /data/rsa_key.pub.pem /backup/$now/ 2>/dev/null || true + cp -a /data/attachments /backup/$now/ 2>/dev/null || true + cp -a /data/sends /backup/$now/ 2>/dev/null || true + cp -a /data/config.json /backup/$now/ 2>/dev/null || true + # Rotate — 30 day retention + find /backup -maxdepth 1 -mindepth 1 -type d -mtime +30 -exec rm -rf {} + + echo "Backup complete: $now" + EOT + ] + volume_mount { + name = "data" + mount_path = "/data" + read_only = true + } + volume_mount { + name = "backup" + mount_path = "/backup" + } + } + volume { + name = "data" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.vaultwarden_data.metadata[0].name + } + } + volume { + name = "backup" + persistent_volume_claim { + claim_name = module.nfs_vaultwarden_backup.claim_name + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } + } + } +} diff --git a/stacks/vaultwarden/secrets b/stacks/vaultwarden/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/vaultwarden/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/vaultwarden/terragrunt.hcl b/stacks/vaultwarden/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/vaultwarden/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/vaultwarden/tiers.tf b/stacks/vaultwarden/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/vaultwarden/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/vpa/main.tf b/stacks/vpa/main.tf new file mode 100644 index 00000000..b4abe0a5 --- /dev/null +++ b/stacks/vpa/main.tf @@ -0,0 +1,7 @@ +variable "tls_secret_name" { type = string } + +module "vpa" { + source = "./modules/vpa" + tls_secret_name = var.tls_secret_name + tier = local.tiers.cluster +} diff --git a/stacks/vpa/modules/vpa/main.tf b/stacks/vpa/modules/vpa/main.tf new file mode 100644 index 00000000..f1d7d4e4 --- /dev/null +++ b/stacks/vpa/modules/vpa/main.tf @@ -0,0 +1,173 @@ +variable "tls_secret_name" { + type = string + sensitive = true +} +variable "tier" { type = string } + +resource "kubernetes_namespace" "vpa" { + metadata { + name = "vpa" + labels = { + tier = var.tier + } + } +} + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.vpa.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +# ----------------------------------------------------------------------------- +# VPA — Vertical Pod Autoscaler (Fairwinds Helm chart) +# ----------------------------------------------------------------------------- +resource "helm_release" "vpa" { + namespace = kubernetes_namespace.vpa.metadata[0].name + create_namespace = false + name = "vpa" + atomic = true + + repository = "https://charts.fairwinds.com/stable" + chart = "vpa" + + values = [yamlencode({ + recommender = { + enabled = true + resources = { + requests = { + cpu = "50m" + memory = "200Mi" + } + limits = { + memory = "200Mi" + } + } + } + updater = { + enabled = true + resources = { + requests = { + cpu = "50m" + memory = "200Mi" + } + limits = { + memory = "200Mi" + } + } + } + admissionController = { + enabled = true + resources = { + requests = { + cpu = "50m" + memory = "200Mi" + } + limits = { + memory = "200Mi" + } + } + } + })] +} + +# ----------------------------------------------------------------------------- +# Goldilocks — VPA dashboard (Fairwinds Helm chart) +# ----------------------------------------------------------------------------- +resource "helm_release" "goldilocks" { + namespace = kubernetes_namespace.vpa.metadata[0].name + create_namespace = false + name = "goldilocks" + atomic = true + + repository = "https://charts.fairwinds.com/stable" + chart = "goldilocks" + + values = [yamlencode({ + controller = { + flags = { + on-by-default = "true" + } + } + dashboard = { + replicaCount = 1 + flags = { + on-by-default = "true" + } + } + })] + + depends_on = [helm_release.vpa] +} + +# ----------------------------------------------------------------------------- +# Ingress — Goldilocks dashboard at goldilocks.viktorbarzin.me +# ----------------------------------------------------------------------------- +module "ingress" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.vpa.metadata[0].name + name = "goldilocks" + service_name = "goldilocks-dashboard" + port = 80 + tls_secret_name = var.tls_secret_name + protected = true + extra_annotations = { + "gethomepage.dev/enabled" = "true" + "gethomepage.dev/name" = "Goldilocks" + "gethomepage.dev/description" = "Resource recommendations" + "gethomepage.dev/icon" = "mdi-scale-balance" + "gethomepage.dev/group" = "Core Platform" + "gethomepage.dev/pod-selector" = "" + } + + depends_on = [helm_release.goldilocks] +} + +# ----------------------------------------------------------------------------- +# Kyverno policy — label namespaces for VPA observe-only mode +# ----------------------------------------------------------------------------- +# Goldilocks reads the goldilocks.fairwinds.com/vpa-update-mode label on +# namespaces to decide the updateMode for VPA objects it creates. +# All namespaces get "off" — Terraform is the authoritative source of truth +# for container resources. Goldilocks provides recommendations only. + +resource "kubernetes_manifest" "vpa_auto_mode_label" { + manifest = { + apiVersion = "kyverno.io/v1" + kind = "ClusterPolicy" + metadata = { + name = "goldilocks-vpa-auto-mode" + annotations = { + "policies.kyverno.io/title" = "Goldilocks VPA Observe-Only Mode" + "policies.kyverno.io/description" = "Sets VPA update mode to off for all namespaces. Terraform owns container resources; Goldilocks provides recommendations only." + } + } + spec = { + rules = [ + { + name = "label-vpa-off-all" + match = { + any = [ + { + resources = { + kinds = ["Namespace"] + } + } + ] + } + mutate = { + patchStrategicMerge = { + metadata = { + labels = { + "goldilocks.fairwinds.com/vpa-update-mode" = "off" + } + } + } + } + }, + ] + } + } + + depends_on = [helm_release.goldilocks] +} diff --git a/stacks/vpa/secrets b/stacks/vpa/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/vpa/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/vpa/terragrunt.hcl b/stacks/vpa/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/vpa/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/vpa/tiers.tf b/stacks/vpa/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/vpa/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/wireguard/main.tf b/stacks/wireguard/main.tf new file mode 100644 index 00000000..91783fe0 --- /dev/null +++ b/stacks/wireguard/main.tf @@ -0,0 +1,15 @@ +variable "tls_secret_name" { type = string } + +data "vault_kv_secret_v2" "secrets" { + mount = "secret" + name = "platform" +} + +module "wireguard" { + source = "./modules/wireguard" + tls_secret_name = var.tls_secret_name + wg_0_conf = data.vault_kv_secret_v2.secrets.data["wireguard_wg_0_conf"] + wg_0_key = data.vault_kv_secret_v2.secrets.data["wireguard_wg_0_key"] + firewall_sh = data.vault_kv_secret_v2.secrets.data["wireguard_firewall_sh"] + tier = local.tiers.core +} diff --git a/stacks/wireguard/modules/wireguard/extra/clients.conf b/stacks/wireguard/modules/wireguard/extra/clients.conf new file mode 100644 index 00000000..3fd014dd --- /dev/null +++ b/stacks/wireguard/modules/wireguard/extra/clients.conf @@ -0,0 +1,4 @@ +[Peer] +# friendly_name = anca +PublicKey = fr4DB6FHhxYyzrtnoNbhdT8Fqwvsz7QkhTnZpSQmBCY= +AllowedIPs = 10.3.3.13/32 \ No newline at end of file diff --git a/stacks/wireguard/modules/wireguard/extra/last_ip.txt b/stacks/wireguard/modules/wireguard/extra/last_ip.txt new file mode 100644 index 00000000..2b188609 --- /dev/null +++ b/stacks/wireguard/modules/wireguard/extra/last_ip.txt @@ -0,0 +1 @@ +# DO NOT MANUALLY EDIT THIS LINE. Last IP: 10.3.3.15/24 diff --git a/stacks/wireguard/modules/wireguard/main.tf b/stacks/wireguard/modules/wireguard/main.tf new file mode 100644 index 00000000..a80b514b --- /dev/null +++ b/stacks/wireguard/modules/wireguard/main.tf @@ -0,0 +1,251 @@ +variable "tls_secret_name" {} +variable "tier" { type = string } +variable "wg_0_conf" {} +variable "firewall_sh" {} +variable "wg_0_key" {} + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.wireguard.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +resource "kubernetes_namespace" "wireguard" { + metadata { + name = "wireguard" + labels = { + tier = var.tier + } + } +} +resource "kubernetes_config_map" "wg_0_conf" { + metadata { + name = "wg0-conf" + namespace = kubernetes_namespace.wireguard.metadata[0].name + + labels = { + app = "wireguard" + } + annotations = { + "reloader.stakater.com/match" = "true" + } + } + + data = { + "setup-firewall.sh" = var.firewall_sh + "wg0.conf" = format("%s%s", var.wg_0_conf, file("${path.module}/extra/clients.conf")) + } +} + +resource "kubernetes_secret" "wg_0_key" { + metadata { + name = "wg0-key" + namespace = kubernetes_namespace.wireguard.metadata[0].name + + annotations = { + "reloader.stakater.com/match" = "true" + } + } + data = { + "wg0.key" = var.wg_0_key + # If thep rivate key changes the pub key must be updated manually + "wg-ui-config" = format("{\"PrivateKey\": \"%s\",\"PublicKey\": \"%s\",\"Users\": {}}", var.wg_0_key, "3OeDa6Z3Z6vPVxn/WKJujYL7DoDYPPpI5W+2glUYLHU=") + } + type = "generic" +} + + +resource "kubernetes_deployment" "wireguard" { + metadata { + name = "wireguard" + namespace = kubernetes_namespace.wireguard.metadata[0].name + labels = { + app = "wireguard" + tier = var.tier + } + annotations = { + "reloader.stakater.com/search" = "true" + } + } + spec { + replicas = 1 + strategy { + rolling_update { + max_surge = "2" + max_unavailable = "0" + } + } + selector { + match_labels = { + app = "wireguard" + } + } + template { + metadata { + labels = { + app = "wireguard" + } + annotations = { + "prometheus.io/scrape" = "true" + "prometheus.io/port" = "9586" + } + } + spec { + init_container { + name = "sysctl-setup" + image = "busybox" + command = ["/bin/sh", "-c", "echo 1 > /proc/sys/net/ipv4/ip_forward"] + + security_context { + privileged = true + } + } + container { + image = "sclevine/wg:latest" + name = "wireguard" + image_pull_policy = "IfNotPresent" + lifecycle { + post_start { + exec { + command = ["wg-quick", "up", "wg0"] + } + } + pre_stop { + exec { + command = ["wg-quick", "down", "wg0"] + } + } + } + command = ["tail", "-f", "/dev/null"] + port { + container_port = 51820 + protocol = "UDP" + } + volume_mount { + name = "wg0-key" + mount_path = "/etc/wireguard/wg0.key" + sub_path = "wg0.key" + } + volume_mount { + name = "wg0-conf" + mount_path = "/etc/wireguard/wg0.conf" + sub_path = "wg0.conf" + } + volume_mount { + name = "wg0-conf" + mount_path = "/etc/wireguard/setup-firewall.sh" + sub_path = "setup-firewall.sh" + } + security_context { + capabilities { + add = ["NET_ADMIN", "SYS_MODULE"] + } + } + resources { + requests = { + cpu = "10m" + memory = "64Mi" + } + limits = { + memory = "64Mi" + } + } + } + + container { + name = "prometheus-exporter" + image = "mindflavor/prometheus-wireguard-exporter" + image_pull_policy = "IfNotPresent" + command = ["prometheus_wireguard_exporter", "-a", "true", "-v", "true", "-n", "/etc/wireguard/wg0.conf"] + volume_mount { + name = "wg0-conf" + mount_path = "/etc/wireguard/wg0.conf" + sub_path = "wg0.conf" + } + security_context { + capabilities { + add = ["NET_ADMIN"] + } + } + port { + container_port = 9586 + protocol = "TCP" + } + resources { + requests = { + cpu = "10m" + memory = "32Mi" + } + limits = { + memory = "32Mi" + } + } + } + volume { + name = "wg0-key" + secret { + secret_name = "wg0-key" + } + } + volume { + name = "wg0-conf" + config_map { + name = "wg0-conf" + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } +} + +resource "kubernetes_service" "wireguard" { + metadata { + name = "wireguard" + namespace = kubernetes_namespace.wireguard.metadata[0].name + annotations = { + "metallb.universe.tf/allow-shared-ip" = "shared" + } + labels = { + "app" = "wireguard" + } + } + + spec { + type = "LoadBalancer" + external_traffic_policy = "Cluster" + selector = { + app = "wireguard" + } + port { + port = "51820" + protocol = "UDP" + } + } +} + + +resource "kubernetes_service" "wireguard_exporter" { + metadata { + name = "wireguard-exporter" + namespace = kubernetes_namespace.wireguard.metadata[0].name + labels = { + "app" = "wireguard-exporter" + } + } + + spec { + selector = { + app = "wireguard" + } + port { + port = "9102" + target_port = "9586" + } + } +} diff --git a/stacks/wireguard/secrets b/stacks/wireguard/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/wireguard/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/wireguard/terragrunt.hcl b/stacks/wireguard/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/wireguard/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/wireguard/tiers.tf b/stacks/wireguard/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/wireguard/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +} diff --git a/stacks/xray/main.tf b/stacks/xray/main.tf new file mode 100644 index 00000000..ef900518 --- /dev/null +++ b/stacks/xray/main.tf @@ -0,0 +1,21 @@ +variable "tls_secret_name" { type = string } + +data "vault_kv_secret_v2" "secrets" { + mount = "secret" + name = "platform" +} + +locals { + xray_reality_clients = jsondecode(data.vault_kv_secret_v2.secrets.data["xray_reality_clients"]) + xray_reality_short_ids = jsondecode(data.vault_kv_secret_v2.secrets.data["xray_reality_short_ids"]) +} + +module "xray" { + source = "./modules/xray" + tls_secret_name = var.tls_secret_name + tier = local.tiers.core + + xray_reality_clients = local.xray_reality_clients + xray_reality_private_key = data.vault_kv_secret_v2.secrets.data["xray_reality_private_key"] + xray_reality_short_ids = local.xray_reality_short_ids +} diff --git a/stacks/xray/modules/xray/main.tf b/stacks/xray/modules/xray/main.tf new file mode 100644 index 00000000..ae258875 --- /dev/null +++ b/stacks/xray/modules/xray/main.tf @@ -0,0 +1,239 @@ +variable "tls_secret_name" {} +variable "tier" { type = string } +variable "xray_reality_clients" { type = list(map(string)) } +variable "xray_reality_private_key" { + type = string + sensitive = true +} +variable "xray_reality_short_ids" { type = list(string) } + +# Github repo - https://github.com/teddysun/across/blob/master/docker/xray/README.md +# Clients: +# iOS - OneXRay - https://github.com/OneXray/OneXray +# MacOS - V2BOX + + +module "tls_secret" { + source = "../../../../modules/kubernetes/setup_tls_secret" + namespace = kubernetes_namespace.xray.metadata[0].name + tls_secret_name = var.tls_secret_name +} + +resource "kubernetes_namespace" "xray" { + metadata { + name = "xray" + labels = { + tier = var.tier + } + } +} + +resource "kubernetes_config_map" "xray_config" { + metadata { + name = "xray-config" + namespace = kubernetes_namespace.xray.metadata[0].name + + labels = { + app = "xray" + } + annotations = { + "reloader.stakater.com/match" = "true" + } + } + + data = { + "config.json" = templatefile("${path.module}/xray_config.json.tpl", { + clients = jsonencode(var.xray_reality_clients) + reality_private_key = var.xray_reality_private_key + reality_short_ids = jsonencode(var.xray_reality_short_ids) + }) + } +} + +resource "kubernetes_deployment" "xray" { + metadata { + name = "xray" + namespace = kubernetes_namespace.xray.metadata[0].name + labels = { + app = "xray" + tier = var.tier + } + annotations = { + "reloader.stakater.com/search" = "true" + } + } + spec { + replicas = 1 + strategy { + rolling_update { + max_surge = "2" + max_unavailable = "0" + } + } + selector { + match_labels = { + app = "xray" + } + } + template { + metadata { + labels = { + app = "xray" + } + } + spec { + container { + image = "teddysun/xray" + name = "xray" + image_pull_policy = "IfNotPresent" + port { + container_port = 6443 // vless + protocol = "TCP" + } + port { + container_port = 7443 // reality + protocol = "TCP" + } + port { + container_port = 8443 // websocket + protocol = "TCP" + } + port { + container_port = 9443 // gRPC + protocol = "TCP" + } + volume_mount { + name = "tls" + mount_path = "/etc/xray/tls.crt" + sub_path = "tls.crt" + } + volume_mount { + name = "tls" + mount_path = "/etc/xray/tls.key" + sub_path = "tls.key" + } + volume_mount { + name = "config" + mount_path = "/etc/xray/config.json" + sub_path = "config.json" + } + resources { + requests = { + cpu = "10m" + memory = "64Mi" + } + limits = { + memory = "64Mi" + } + } + } + + volume { + name = "tls" + secret { + secret_name = var.tls_secret_name + } + } + volume { + name = "config" + config_map { + name = "xray-config" + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } +} + +resource "kubernetes_service" "xray" { + metadata { + name = "xray" + namespace = kubernetes_namespace.xray.metadata[0].name + labels = { + "app" = "xray" + } + } + + spec { + selector = { + app = "xray" + } + port { + name = "vless" + port = 6443 + protocol = "TCP" + } + port { + name = "websocket" + port = 8443 + protocol = "TCP" + } + port { + name = "grpc" + port = 9443 + protocol = "TCP" + } + } +} + +resource "kubernetes_service" "xray-reality" { + metadata { + name = "xray-reality" + namespace = kubernetes_namespace.xray.metadata[0].name + labels = { + "app" = "xray" + } + } + + spec { + type = "LoadBalancer" + selector = { + app = "xray" + } + port { + name = "reality" + port = 7443 + protocol = "TCP" + } + } +} + +module "ingress_ws" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.xray.metadata[0].name + name = "xray-ws" + service_name = "xray" + host = "xray-ws" + port = 8443 + tls_secret_name = var.tls_secret_name +} + +module "ingress_grpc" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.xray.metadata[0].name + name = "xray-grpc" + service_name = "xray" + host = "xray-grpc" + port = 9443 + tls_secret_name = var.tls_secret_name + ingress_path = ["/grpc-vpn"] + extra_annotations = { + "traefik.ingress.kubernetes.io/service.serversscheme" = "h2c" + } +} + +module "ingress_vless" { + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.xray.metadata[0].name + name = "xray-vless" + service_name = "xray" + host = "xray-vless" + port = 6443 + tls_secret_name = var.tls_secret_name +} diff --git a/stacks/xray/modules/xray/xray_config.json.tpl b/stacks/xray/modules/xray/xray_config.json.tpl new file mode 100644 index 00000000..a3cefd3a --- /dev/null +++ b/stacks/xray/modules/xray/xray_config.json.tpl @@ -0,0 +1,80 @@ +{ + "log": { + "loglevel": "none" + }, + "inbounds": [ + { + "port": 7443, + "protocol": "vless", + "settings": { + "clients": ${clients}, + "decryption": "none" + }, + "streamSettings": { + "network": "tcp", + "security": "reality", + "realitySettings": { + "show": false, + "dest": "www.cloudflare.com:443", + "xver": 0, + "serverNames": [ + "www.cloudflare.com" + ], + "privateKey": "${reality_private_key}", + "shortIds": ${reality_short_ids} + } + } + }, + { + "port": 8443, + "protocol": "vless", + "settings": { + "clients": ${clients}, + "decryption": "none" + }, + "streamSettings": { + "network": "ws", + "security": "none", + "tlsSettings": { + "certificates": [ + { + "certificateFile": "/etc/xray/tls.crt", + "keyFile": "/etc/xray/tls.key" + } + ] + }, + "wsSettings": { + "path": "/ws" + } + } + }, + { + "port": 9443, + "protocol": "vless", + "settings": { + "clients": ${clients}, + "decryption": "none" + }, + "streamSettings": { + "network": "xhttp", + "security": "none", + "tlsSettings": { + "certificates": [ + { + "certificateFile": "/etc/xray/tls.crt", + "keyFile": "/etc/xray/tls.key" + } + ] + }, + "xhttpSettings": { + "path": "/grpc-vpn" + } + } + } + ], + "outbounds": [ + { + "protocol": "freedom" + } + ] +} diff --git a/stacks/xray/secrets b/stacks/xray/secrets new file mode 120000 index 00000000..ca54a7cf --- /dev/null +++ b/stacks/xray/secrets @@ -0,0 +1 @@ +../../secrets \ No newline at end of file diff --git a/stacks/xray/terragrunt.hcl b/stacks/xray/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/xray/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/xray/tiers.tf b/stacks/xray/tiers.tf new file mode 100644 index 00000000..eb0f8083 --- /dev/null +++ b/stacks/xray/tiers.tf @@ -0,0 +1,10 @@ +# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa +locals { + tiers = { + core = "0-core" + cluster = "1-cluster" + gpu = "2-gpu" + edge = "3-edge" + aux = "4-aux" + } +}