diff --git a/docs/architecture/authentication.md b/docs/architecture/authentication.md index bb6a7289..42d79561 100644 --- a/docs/architecture/authentication.md +++ b/docs/architecture/authentication.md @@ -40,8 +40,8 @@ graph TB | Component | Version | Location | Purpose | |-----------|---------|----------|---------| -| Authentik Server | Latest | `stacks/authentik/` | Core IdP application servers (3 replicas) | -| Authentik Worker | Latest | `stacks/authentik/` | Background task processors (3 replicas) | +| Authentik Server | 2026.2.2 | `stacks/authentik/` | Core IdP application servers (2 replicas) | +| Authentik Worker | 2026.2.2 | `stacks/authentik/` | Background task processors (2 replicas) | | PgBouncer | Latest | `stacks/authentik/` | PostgreSQL connection pooler (3 replicas) | | Embedded Outpost | - | Built into Authentik | Forward auth endpoint for Traefik | | Traefik ForwardAuth | - | `ingress_factory` module | Middleware for protected ingresses | diff --git a/stacks/authentik/modules/authentik/main.tf b/stacks/authentik/modules/authentik/main.tf index 7fd9c685..225daf7a 100644 --- a/stacks/authentik/modules/authentik/main.tf +++ b/stacks/authentik/modules/authentik/main.tf @@ -55,21 +55,23 @@ resource "helm_release" "authentik" { repository = "https://charts.goauthentik.io/" chart = "authentik" - # version = "2025.8.1" - version = "2025.10.3" + # version = "2025.10.3" + # version = "2025.12.4" + version = "2026.2.2" atomic = true timeout = 6000 - values = [templatefile("${path.module}/values.yaml", { postgres_password = var.postgres_password, secret_key = var.secret_key, redis_host = var.redis_host })] + values = [templatefile("${path.module}/values.yaml", { postgres_password = var.postgres_password, secret_key = var.secret_key })] } module "ingress" { - source = "../../../../modules/kubernetes/ingress_factory" - namespace = kubernetes_namespace.authentik.metadata[0].name - name = "authentik" - service_name = "goauthentik-server" - tls_secret_name = var.tls_secret_name + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.authentik.metadata[0].name + name = "authentik" + service_name = "goauthentik-server" + tls_secret_name = var.tls_secret_name + anti_ai_scraping = false extra_annotations = { "gethomepage.dev/enabled" = "true" "gethomepage.dev/name" = "Authentik" @@ -84,12 +86,14 @@ module "ingress" { } module "ingress-outpost" { - source = "../../../../modules/kubernetes/ingress_factory" - namespace = kubernetes_namespace.authentik.metadata[0].name - name = "authentik-outpost" - host = "authentik" - service_name = "ak-outpost-authentik-embedded-outpost" - port = 9000 - ingress_path = ["/outpost.goauthentik.io"] - tls_secret_name = var.tls_secret_name + source = "../../../../modules/kubernetes/ingress_factory" + namespace = kubernetes_namespace.authentik.metadata[0].name + name = "authentik-outpost" + host = "authentik" + service_name = "ak-outpost-authentik-embedded-outpost" + port = 9000 + ingress_path = ["/outpost.goauthentik.io"] + tls_secret_name = var.tls_secret_name + anti_ai_scraping = false + exclude_crowdsec = true } diff --git a/stacks/authentik/modules/authentik/pgbouncer.ini b/stacks/authentik/modules/authentik/pgbouncer.ini index e3dc4e4f..8148ab53 100644 --- a/stacks/authentik/modules/authentik/pgbouncer.ini +++ b/stacks/authentik/modules/authentik/pgbouncer.ini @@ -6,7 +6,7 @@ listen_addr = 0.0.0.0 listen_port = 6432 auth_type = md5 auth_file = /etc/pgbouncer/userlist.txt -pool_mode = transaction +pool_mode = session max_client_conn = 200 default_pool_size = 20 reserve_pool_size = 5 diff --git a/stacks/authentik/modules/authentik/values.yaml b/stacks/authentik/modules/authentik/values.yaml index 55624c35..2827a5a2 100644 --- a/stacks/authentik/modules/authentik/values.yaml +++ b/stacks/authentik/modules/authentik/values.yaml @@ -1,19 +1,19 @@ authentik: log_level: warning # log_level: trace - secret_key: "${secret_key}" + secret_key: "" + existingSecret: + secretName: "goauthentik" # This sends anonymous usage-data, stack traces on errors and # performance data to authentik.error-reporting.a7k.io, and is fully opt-in error_reporting: - enabled: true + enabled: false postgresql: # host: postgresql.dbaas host: pgbouncer.authentik port: 6432 user: authentik - password: ${postgres_password} - redis: - host: ${redis_host} + password: "" server: replicas: 2 @@ -58,9 +58,9 @@ worker: resources: requests: cpu: 100m - memory: 1Gi + memory: 1.5Gi limits: - memory: 1Gi + memory: 1.5Gi topologySpreadConstraints: - maxSkew: 1 topologyKey: kubernetes.io/hostname @@ -71,3 +71,6 @@ worker: pdb: enabled: true maxUnavailable: 1 + +postgresql: + enabled: false diff --git a/stacks/diun/main.tf b/stacks/diun/main.tf index 3e2af67d..04af08e6 100644 --- a/stacks/diun/main.tf +++ b/stacks/diun/main.tf @@ -41,44 +41,6 @@ resource "kubernetes_manifest" "external_secret" { depends_on = [kubernetes_namespace.diun] } -resource "kubernetes_manifest" "external_secret_git" { - manifest = { - apiVersion = "external-secrets.io/v1beta1" - kind = "ExternalSecret" - metadata = { - name = "diun-git-secrets" - namespace = "diun" - } - spec = { - refreshInterval = "15m" - secretStoreRef = { - name = "vault-kv" - kind = "ClusterSecretStore" - } - target = { - name = "diun-git-secrets" - } - data = [ - { - secretKey = "git_token" - remoteRef = { - key = "viktor" - property = "webhook_handler_git_token" - } - }, - { - secretKey = "git_user" - remoteRef = { - key = "viktor" - property = "webhook_handler_git_user" - } - } - ] - } - } - depends_on = [kubernetes_namespace.diun] -} - module "tls_secret" { source = "../../modules/kubernetes/setup_tls_secret" namespace = kubernetes_namespace.diun.metadata[0].name @@ -119,28 +81,6 @@ resource "kubernetes_cluster_role_binding" "diun" { } } -resource "kubernetes_persistent_volume_claim" "repo" { - wait_until_bound = false - metadata { - name = "diun-repo" - namespace = kubernetes_namespace.diun.metadata[0].name - annotations = { - "resize.topolvm.io/threshold" = "80%" - "resize.topolvm.io/increase" = "100%" - "resize.topolvm.io/storage_limit" = "5Gi" - } - } - spec { - access_modes = ["ReadWriteOnce"] - storage_class_name = "proxmox-lvm" - resources { - requests = { - storage = "1Gi" - } - } - } -} - resource "kubernetes_persistent_volume_claim" "data_proxmox" { wait_until_bound = false metadata { @@ -163,81 +103,6 @@ resource "kubernetes_persistent_volume_claim" "data_proxmox" { } } -resource "kubernetes_config_map_v1" "auto_update_script" { - metadata { - name = "diun-auto-update-script" - namespace = kubernetes_namespace.diun.metadata[0].name - } - data = { - "auto-update.sh" = <<-SCRIPT - #!/bin/sh - set -e - - # Only act on updates (not new or unchanged) - [ "$$DIUN_ENTRY_STATUS" = "update" ] || exit 0 - - IMAGE="$$DIUN_ENTRY_IMAGE" - NEW_TAG="$$DIUN_ENTRY_IMAGETAG" - - echo "[auto-update] Detected update: $$IMAGE -> $$NEW_TAG" - - # Skip databases - case "$$IMAGE" in - *postgres*|*mysql*|*redis*|*clickhouse*|*etcd*) echo "[auto-update] Skipping database image"; exit 0 ;; - esac - - # Skip custom images (handled by CI/CD) - case "$$IMAGE" in - viktorbarzin/*|registry.viktorbarzin.me/*|ancamilea/*|mghee/*) echo "[auto-update] Skipping CI/CD-managed image"; exit 0 ;; - esac - - # Skip kube-system / infrastructure images - case "$$IMAGE" in - registry.k8s.io/*|quay.io/tigera/*|quay.io/metallb/*|nvcr.io/*|reg.kyverno.io/*) echo "[auto-update] Skipping infrastructure image"; exit 0 ;; - esac - - # Acquire lock (serialize concurrent DIUN notifications) - exec 200>/tmp/auto-update.lock - flock -n 200 || { echo "[auto-update] Another update in progress, skipping"; exit 0; } - - cd /repo - - # Configure git - git config user.email "diun@viktorbarzin.me" - git config user.name "DIUN Auto-Update" - - # Pull latest using HTTPS with token - git remote set-url origin "https://$${GIT_USER}:$${GIT_TOKEN}@github.com/ViktorBarzin/infra.git" - git pull --rebase origin master || { echo "[auto-update] git pull failed"; exit 1; } - - # Find .tf files containing this image - MATCHES=$$(grep -rl "\"$${IMAGE}:" stacks/ --include="*.tf" 2>/dev/null || true) - [ -z "$$MATCHES" ] && { echo "[auto-update] No .tf file found for $$IMAGE"; exit 0; } - - # Update the image tag in each matching file - UPDATED=0 - for FILE in $$MATCHES; do - if sed -i "s|\"$${IMAGE}:[^\"]*\"|\"$${IMAGE}:$${NEW_TAG}\"|g" "$$FILE"; then - echo "[auto-update] Updated $$FILE" - UPDATED=1 - fi - done - - # Check if anything actually changed - if git diff --quiet; then - echo "[auto-update] No changes after update for $$IMAGE:$$NEW_TAG (already up to date)" - exit 0 - fi - - # Commit and push - git add -A stacks/ - git commit -m "auto-update: $${IMAGE} -> $${NEW_TAG}" - git push origin master - echo "[auto-update] Pushed update: $${IMAGE}:$${NEW_TAG}" - SCRIPT - } -} - resource "kubernetes_deployment" "diun" { metadata { name = "diun" @@ -269,50 +134,6 @@ resource "kubernetes_deployment" "diun" { } spec { service_account_name = "diun" - init_container { - name = "clone-repo" - image = "alpine/git:latest" - command = ["/bin/sh", "-c"] - args = [<<-EOF - if [ -d /repo/.git ]; then - cd /repo && git pull --rebase origin master || true - else - git clone https://$${GIT_USER}:$${GIT_TOKEN}@github.com/ViktorBarzin/infra.git /repo - fi - EOF - ] - env { - name = "GIT_USER" - value_from { - secret_key_ref { - name = "diun-git-secrets" - key = "git_user" - } - } - } - env { - name = "GIT_TOKEN" - value_from { - secret_key_ref { - name = "diun-git-secrets" - key = "git_token" - } - } - } - volume_mount { - name = "repo" - mount_path = "/repo" - } - resources { - requests = { - cpu = "10m" - memory = "64Mi" - } - limits = { - memory = "128Mi" - } - } - } container { image = "viktorbarzin/diun:latest" name = "diun" @@ -349,12 +170,25 @@ resource "kubernetes_deployment" "diun" { name = "DIUN_DEFAULTS_SORTTAGS" value = "reverse" } - # Script notifier for auto-updates + # Webhook notifier for upgrade agent (via n8n) env { - name = "DIUN_NOTIF_SCRIPT_CMD" - value = "/scripts/auto-update.sh" + name = "DIUN_NOTIF_WEBHOOK_ENDPOINT" + value_from { + secret_key_ref { + name = "diun-secrets" + key = "n8n_webhook_url" + } + } } - # Slack notifier (kept alongside script notifier) + env { + name = "DIUN_NOTIF_WEBHOOK_METHOD" + value = "POST" + } + env { + name = "DIUN_NOTIF_WEBHOOK_HEADERS_CONTENT-TYPE" + value = "application/json" + } + # Slack notifier (independent notification channel) env { name = "DIUN_NOTIF_SLACK_WEBHOOKURL" value_from { @@ -364,25 +198,6 @@ resource "kubernetes_deployment" "diun" { } } } - # Git credentials for auto-update script - env { - name = "GIT_USER" - value_from { - secret_key_ref { - name = "diun-git-secrets" - key = "git_user" - } - } - } - env { - name = "GIT_TOKEN" - value_from { - secret_key_ref { - name = "diun-git-secrets" - key = "git_token" - } - } - } env { name = "LOG_LEVEL" value = "debug" @@ -391,14 +206,6 @@ resource "kubernetes_deployment" "diun" { name = "data" mount_path = "/data" } - volume_mount { - name = "scripts" - mount_path = "/scripts" - } - volume_mount { - name = "repo" - mount_path = "/repo" - } resources { requests = { cpu = "10m" @@ -415,19 +222,6 @@ resource "kubernetes_deployment" "diun" { claim_name = kubernetes_persistent_volume_claim.data_proxmox.metadata[0].name } } - volume { - name = "scripts" - config_map { - name = kubernetes_config_map_v1.auto_update_script.metadata[0].name - default_mode = "0755" - } - } - volume { - name = "repo" - persistent_volume_claim { - claim_name = kubernetes_persistent_volume_claim.repo.metadata[0].name - } - } } } } diff --git a/stacks/mailserver/main.tf b/stacks/mailserver/main.tf index 9ca6a504..da131f8d 100644 --- a/stacks/mailserver/main.tf +++ b/stacks/mailserver/main.tf @@ -34,6 +34,6 @@ module "mailserver" { sasl_passwd = local.mailserver_sasl_passwd roundcube_db_password = data.vault_kv_secret_v2.secrets.data["mailserver_roundcubemail_db_password"] tier = local.tiers.edge - mailgun_api_key = data.vault_kv_secret_v2.viktor.data["mailgun_api_key"] + brevo_api_key = jsondecode(base64decode(data.vault_kv_secret_v2.viktor.data["brevo_api_key"]))["api_key"] email_monitor_imap_password = local.mailserver_accounts["spam@viktorbarzin.me"] } diff --git a/stacks/mailserver/modules/mailserver/main.tf b/stacks/mailserver/modules/mailserver/main.tf index 81308138..93069515 100644 --- a/stacks/mailserver/modules/mailserver/main.tf +++ b/stacks/mailserver/modules/mailserver/main.tf @@ -5,7 +5,7 @@ variable "postfix_account_aliases" {} variable "opendkim_key" {} variable "sasl_passwd" {} # For sendgrid i.e relayhost variable "nfs_server" { type = string } -variable "mailgun_api_key" { +variable "brevo_api_key" { type = string sensitive = true } @@ -537,7 +537,7 @@ resource "kubernetes_service" "mailserver" { # ============================================================================= # E2E Email Roundtrip Monitor -# Sends test email via Mailgun API, verifies delivery via IMAP, pushes metrics +# Sends test email via Brevo API, verifies delivery via IMAP, pushes metrics # ============================================================================= resource "kubernetes_cron_job_v1" "email_roundtrip_monitor" { metadata { @@ -562,9 +562,9 @@ resource "kubernetes_cron_job_v1" "email_roundtrip_monitor" { image = "docker.io/library/python:3.12-alpine" command = ["/bin/sh", "-c", <<-EOT pip install --quiet --disable-pip-version-check requests && python3 -c ' -import requests, imaplib, email, time, os, uuid, sys, ssl +import requests, imaplib, email, time, os, uuid, sys, ssl, json -MAILGUN_API_KEY = os.environ["MAILGUN_API_KEY"] +BREVO_API_KEY = os.environ["BREVO_API_KEY"] IMAP_USER = "spam@viktorbarzin.me" IMAP_PASS = os.environ["EMAIL_MONITOR_IMAP_PASSWORD"] IMAP_HOST = "mailserver.mailserver.svc.cluster.local" @@ -578,20 +578,24 @@ success = 0 duration = 0 try: - # Step 1: Send via Mailgun HTTP API to smoke-test@ (hits catch-all -> spam@) + # Step 1: Send via Brevo Transactional Email API to smoke-test@ (hits catch-all -> spam@) resp = requests.post( - f"https://api.eu.mailgun.net/v3/{DOMAIN}/messages", - auth=("api", MAILGUN_API_KEY), - data={ - "from": f"monitoring@{DOMAIN}", - "to": f"smoke-test@{DOMAIN}", + "https://api.brevo.com/v3/smtp/email", + headers={ + "api-key": BREVO_API_KEY, + "Content-Type": "application/json", + "Accept": "application/json", + }, + json={ + "sender": {"name": "Monitoring", "email": f"monitoring@{DOMAIN}"}, + "to": [{"email": f"smoke-test@{DOMAIN}"}], "subject": subject, - "text": f"E2E email monitoring probe {marker}. Auto-generated, will be deleted.", + "textContent": f"E2E email monitoring probe {marker}. Auto-generated, will be deleted.", }, timeout=30, ) resp.raise_for_status() - print(f"Sent test email via Mailgun: {resp.status_code} marker={marker}") + print(f"Sent test email via Brevo: {resp.status_code} marker={marker}") # Step 2: Wait for delivery, retry IMAP up to 3 min ctx = ssl.create_default_context() @@ -667,8 +671,8 @@ sys.exit(0 if success else 1) EOT ] env { - name = "MAILGUN_API_KEY" - value = var.mailgun_api_key + name = "BREVO_API_KEY" + value = var.brevo_api_key } env { name = "EMAIL_MONITOR_IMAP_PASSWORD" diff --git a/stacks/meshcentral/main.tf b/stacks/meshcentral/main.tf index 6dc7e1fa..6757b651 100644 --- a/stacks/meshcentral/main.tf +++ b/stacks/meshcentral/main.tf @@ -114,19 +114,43 @@ resource "kubernetes_deployment" "meshcentral" { image_pull_policy = "IfNotPresent" command = ["/bin/sh"] args = ["-c", <<-EOT -if [ -f /opt/meshcentral/meshcentral-data/config.json ]; then +CONFIG=/opt/meshcentral/meshcentral-data/config.json +if [ -f "$CONFIG" ]; then # Disable certUrl when using Traefik reverse proxy with TLS offload - sed -i 's/"certUrl":/"_certUrl":/g' /opt/meshcentral/meshcentral-data/config.json + sed -i 's/"certUrl":/"_certUrl":/g' "$CONFIG" # Fix WebRTC value from string to boolean - sed -i 's/"WebRTC": "[^"]*"/"WebRTC": false/g' /opt/meshcentral/meshcentral-data/config.json + sed -i 's/"WebRTC": "[^"]*"/"WebRTC": false/g' "$CONFIG" # Ensure TLSOffload is enabled (Traefik terminates TLS, MeshCentral serves HTTP on 443) - # Re-enable if previously disabled by restoring _TLSOffload back to TLSOffload - sed -i 's/"_TLSOffload":/"TLSOffload":/g' /opt/meshcentral/meshcentral-data/config.json - # Set TLSOffload to true (accepts any reverse proxy) - sed -i 's/"TLSOffload": "[^"]*"/"TLSOffload": true/g' /opt/meshcentral/meshcentral-data/config.json - sed -i 's/"TLSOffload": false/"TLSOffload": true/g' /opt/meshcentral/meshcentral-data/config.json + sed -i 's/"_TLSOffload":/"TLSOffload":/g' "$CONFIG" + sed -i 's/"TLSOffload": "[^"]*"/"TLSOffload": true/g' "$CONFIG" + sed -i 's/"TLSOffload": false/"TLSOffload": true/g' "$CONFIG" +else + # First run: create config from template before startup.sh runs, so REVERSE_PROXY + # env var doesn't generate a bad certUrl. Pre-seed with correct values. + cat > "$CONFIG" <<'CONF' +{ + "$schema": "http://info.meshcentral.com/downloads/meshcentral-config-schema.json", + "settings": { + "cert": "meshcentral.viktorbarzin.me", + "_WANonly": true, + "_LANonly": true, + "port": 443, + "redirPort": 80, + "AgentPong": 300, + "TLSOffload": true, + "SelfUpdate": false, + "AllowFraming": false, + "WebRTC": false + }, + "domains": { + "": { + "NewAccounts": false + } + } +} +CONF fi EOT ] @@ -153,7 +177,7 @@ EOT } env { name = "REVERSE_PROXY" - value = "true" + value = "false" } env { name = "ALLOW_NEW_ACCOUNTS" diff --git a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl index 7630a821..d0fcfcb6 100755 --- a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl @@ -1594,10 +1594,10 @@ serverFiles: - alert: HighServiceLatency expr: | ( - sum(rate(traefik_service_request_duration_seconds_sum{service!~".*idrac.*|.*headscale.*|.*authentik.*"}[5m])) by (service) - / sum(rate(traefik_service_request_duration_seconds_count{service!~".*idrac.*|.*headscale.*|.*authentik.*"}[5m])) by (service) + sum(rate(traefik_service_request_duration_seconds_sum{service!~".*idrac.*|.*headscale.*"}[5m])) by (service) + / sum(rate(traefik_service_request_duration_seconds_count{service!~".*idrac.*|.*headscale.*"}[5m])) by (service) ) > 10 - and sum(rate(traefik_service_request_duration_seconds_count{service!~".*idrac.*|.*headscale.*|.*authentik.*"}[5m])) by (service) > 0.01 + and sum(rate(traefik_service_request_duration_seconds_count{service!~".*idrac.*|.*headscale.*"}[5m])) by (service) > 0.01 and on() (time() - process_start_time_seconds{job="prometheus"}) > 900 for: 5m labels: diff --git a/stacks/nextcloud/chart_values.yaml b/stacks/nextcloud/chart_values.yaml index 66dd8c7f..e54855d5 100644 --- a/stacks/nextcloud/chart_values.yaml +++ b/stacks/nextcloud/chart_values.yaml @@ -30,12 +30,14 @@ nextcloud: zzz-redis.config.php: |