infra/stacks/authentik/modules/authentik/values.yaml
Viktor Barzin c3d0c121bb
All checks were successful
ci/woodpecker/push/default Pipeline was successful
feat(authentik): wire SMTP (noreply@) for TripIt signup verification + recovery email (ADR-0020)
Authentik email was unconfigured (localhost), so the TripIt enrollment flow's email-verification stage couldn't send. Add AUTHENTIK_EMAIL__* to server.env + worker.env pointing at the in-cluster mailserver as noreply@viktorbarzin.me (587/STARTTLS), with the SASL password synced from Vault secret/authentik.smtp_password via a new authentik-email ExternalSecret (reloader-annotated). Image pin unchanged (2026.2.4 == live). Prereq for the tripit-enrollment flow.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-17 07:04:52 +00:00

185 lines
7.1 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

authentik:
# NOTE: because we set existingSecret below, the chart does NOT render the
# authentik.* values into an AUTHENTIK_* env Secret — the live env comes
# from the orphaned, helm-keep-policy `goauthentik` Secret created by chart
# 2025.10.3. Anything under authentik.* here is effectively INERT. All new
# or tuned config MUST go through server.env / worker.env instead (see
# .claude/reference/authentik-state.md).
log_level: warning
# log_level: trace
secret_key: ""
existingSecret:
secretName: "goauthentik"
# This sends anonymous usage-data, stack traces on errors and
# performance data to authentik.error-reporting.a7k.io, and is fully opt-in
error_reporting:
enabled: false
postgresql:
# host: postgresql.dbaas
host: pgbouncer.authentik
port: 6432
user: authentik
password: ""
server:
replicas: 3
env:
# Anonymous Django sessions (no completed login: bots, healthcheckers,
# partial flows) expire in 2h. Default is days=1. Once login completes,
# UserLoginStage.session_duration takes over via request.session.set_expiry.
# Injected via server.env (not authentik.sessions.*) because we use
# authentik.existingSecret.secretName, which makes the chart skip
# rendering the AUTHENTIK_* secret — so the values block doesn't reach env.
- name: AUTHENTIK_SESSIONS__UNAUTHENTICATED_AGE
value: "hours=2"
# Gunicorn: 3 workers × 4 threads per server pod (defaults 2×4).
# Pairs with the server memory limit of 2Gi (each worker preloads
# Django ~500Mi).
- name: AUTHENTIK_WEB__WORKERS
value: "3"
- name: AUTHENTIK_WEB__THREADS
value: "4"
# Cache flow plans for 30m and policy evaluations for 15m (defaults 300s).
# Authentik 2026.2 stores cache in Postgres, so a TTL hit is still a
# SELECT — but a single indexed lookup beats re-planning the flow
# (~70 sequential ORM ops per flow stage POST).
- name: AUTHENTIK_CACHE__TIMEOUT_FLOWS
value: "1800"
- name: AUTHENTIK_CACHE__TIMEOUT_POLICIES
value: "900"
# SMTP for signup verification + recovery email (tripit ADR-0020): send via
# the in-cluster mailserver as noreply@viktorbarzin.me (SASL, 587/STARTTLS);
# password from the authentik-email ExternalSecret (Vault
# secret/authentik.smtp_password). Set on server AND worker — the worker
# runs the email tasks, the server validates the Email stage config.
- name: AUTHENTIK_EMAIL__HOST
value: "mailserver.mailserver.svc.cluster.local"
- name: AUTHENTIK_EMAIL__PORT
value: "587"
- name: AUTHENTIK_EMAIL__USE_TLS
value: "true"
- name: AUTHENTIK_EMAIL__USERNAME
value: "noreply@viktorbarzin.me"
- name: AUTHENTIK_EMAIL__FROM
value: "TripIt <noreply@viktorbarzin.me>"
- name: AUTHENTIK_EMAIL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-email
key: AUTHENTIK_EMAIL__PASSWORD
# Do NOT set AUTHENTIK_POSTGRESQL__CONN_MAX_AGE here. With PgBouncer in
# session mode every persistent Django connection pins a server connection
# 1:1, so the 3x(20+5) pool saturated during the 2026-06-10 rolling
# restart (58s pool waits, readiness flapping, and the shared CNPG primary
# failed over mid-storm). The ~1-2ms/request connection-setup saving is
# not worth that risk on the shared PG substrate.
# Liveness budget sized for slow boots (2026-06-10 incident): during a
# rolling restart pods queue on authentik's DB migration lock; the go layer
# answers /-/health/live before the core is up, so with the default 3x10s
# budget kubelet kill-looped every booting pod and amplified the contention.
# Startup probe still bounds total boot time (60x10s).
livenessProbe:
failureThreshold: 6
timeoutSeconds: 5
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
resources:
requests:
cpu: 100m
memory: 1.5Gi
limits:
memory: 2Gi
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app.kubernetes.io/component: server
ingress:
enabled: false
# hosts:
# - authentik.viktorbarzin.me
podAnnotations:
diun.enable: true
diun.include_tags: "^202[0-9].[0-9]+.*$" # no need to annotate the worker as it uses the same image
pdb:
enabled: true
minAvailable: 2
global:
addPrometheusAnnotations: true
image:
# Pin to the Keel-managed live tag. Keel (diun-annotated, keel.sh/enrolled
# namespace) bumps the IMAGE between chart releases, while helm defaults
# the tag to the chart appVersion — so any helm upgrade silently
# DOWNGRADES the running pods to the chart pin (2026-06-10: a values-only
# apply rolled live 2026.2.4 back to 2026.2.2 against a 2026.2.4-migrated
# DB → boot storm, see docs/post-mortems/2026-06-10-authentik-downgrade-
# boot-storm.md). Keep this tag in sync with what Keel has deployed when
# touching this chart; clear it only when bumping the chart version itself.
tag: "2026.2.4"
worker:
# 2 replicas: workers handle background tasks (LDAP sync, email,
# certificate renewal) — no user-facing traffic, so 2-of-3 isn't
# needed for availability. Drop saves ~100m sustained CPU.
replicas: 2
env:
# Same unauthenticated_age cap as server — both the server (Django session
# middleware) and worker (cleanup tasks) need to see the value.
- name: AUTHENTIK_SESSIONS__UNAUTHENTICATED_AGE
value: "hours=2"
# Dramatiq worker threads per pod (default 2).
- name: AUTHENTIK_WORKER__THREADS
value: "4"
# Keep cache settings in lockstep with server.env. (No CONN_MAX_AGE —
# see the server.env note: session-mode PgBouncer pins persistent conns.)
- name: AUTHENTIK_CACHE__TIMEOUT_FLOWS
value: "1800"
- name: AUTHENTIK_CACHE__TIMEOUT_POLICIES
value: "900"
# SMTP (same as server.env) — the worker runs Authentik's email tasks, so it
# needs the transport too (tripit ADR-0020). noreply@viktorbarzin.me via the
# in-cluster mailserver; password from the authentik-email ExternalSecret.
- name: AUTHENTIK_EMAIL__HOST
value: "mailserver.mailserver.svc.cluster.local"
- name: AUTHENTIK_EMAIL__PORT
value: "587"
- name: AUTHENTIK_EMAIL__USE_TLS
value: "true"
- name: AUTHENTIK_EMAIL__USERNAME
value: "noreply@viktorbarzin.me"
- name: AUTHENTIK_EMAIL__FROM
value: "TripIt <noreply@viktorbarzin.me>"
- name: AUTHENTIK_EMAIL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-email
key: AUTHENTIK_EMAIL__PASSWORD
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
resources:
requests:
cpu: 100m
memory: 1.5Gi
limits:
memory: 2Gi
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app.kubernetes.io/component: worker
pdb:
enabled: true
maxUnavailable: 1
postgresql:
enabled: false