authentik: speed up first-time signin (single-screen login, live env tuning, asset caching, outpost+nginx hot path)
Viktor asked to review Authentik and the web tier and make first-time signin to apps faster. Review found the slowness is screens and round trips, not server time. Changes: - values.yaml: the authentik.* Helm values (gunicorn workers, cache timeouts, conn_max_age) were silently INERT because existingSecret skips chart env rendering — pods ran defaults (2 workers, 300s caches, no persistent DB conns). Moved all tuning into server.env/worker.env, which actually reaches the pods. - authentik_provider.tf: adopt the identification stage and pin password_stage so username+password render on ONE screen (the separate order-20 password binding is deleted via API — authentik requires that when embedding). Outpost log_level trace->info and 1->2 replicas (it is on the hot path of every forward-auth request; PG-backed sessions make 2 replicas safe). - authentik module: /static ingress carve-out with immutable Cache-Control (assets are version-fingerprinted but served with no max-age — internal split-horizon users got zero caching). - traefik auth-proxy nginx: upstream keepalive 32 + HTTP/1.1 (was opening a fresh TCP connection to the outpost per subrequest) + config-checksum annotation so config changes roll the pods. - docs: authentication.md + authentik-state.md updated; fixed stale 'postgresql.dbaas has no endpoints' claim in CLAUDE.md/CONTEXT.md (it is a live CNPG primary-selector compatibility service). Done via API in the same change (UI-managed objects): 6 OIDC providers (Vault, Forgejo, Immich, Headscale, linkwarden, Cloudflare Access) switched from explicit to implicit consent — all first-party, the 4-weekly consent screen only slowed first-time signin. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
93ba67c84a
commit
97ccdbecb8
8 changed files with 232 additions and 55 deletions
|
|
@ -91,14 +91,21 @@ resource "authentik_outpost" "embedded" {
|
|||
protocol_providers = [authentik_provider_proxy.catchall.id]
|
||||
service_connection = "99e227a7-4562-4888-9660-4c27da678c50"
|
||||
config = jsonencode({
|
||||
log_level = "trace"
|
||||
docker_labels = null
|
||||
authentik_host = "https://authentik.viktorbarzin.me/"
|
||||
docker_network = null
|
||||
container_image = null
|
||||
docker_map_ports = true
|
||||
refresh_interval = "minutes=5"
|
||||
kubernetes_replicas = 1
|
||||
# info, not trace: the outpost sits on the hot path of every request to
|
||||
# every auth="required" ingress — trace logging is per-request overhead
|
||||
# with no operational value (request access lines are emitted at info).
|
||||
log_level = "info"
|
||||
docker_labels = null
|
||||
authentik_host = "https://authentik.viktorbarzin.me/"
|
||||
docker_network = null
|
||||
container_image = null
|
||||
docker_map_ports = true
|
||||
refresh_interval = "minutes=5"
|
||||
# 2 replicas: removes the single-pod hot path for all forward-auth
|
||||
# subrequests. Safe since sessions moved to the shared Postgres backend
|
||||
# (authentik_providers_proxy_proxysession, 2026-05-10) — no pod-local
|
||||
# session state anymore.
|
||||
kubernetes_replicas = 2
|
||||
kubernetes_namespace = "authentik"
|
||||
authentik_host_browser = ""
|
||||
object_naming_template = "ak-outpost-%(name)s"
|
||||
|
|
@ -198,3 +205,46 @@ resource "authentik_stage_user_login" "default_login" {
|
|||
]
|
||||
}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Default Identification stage — adopted 2026-06-10 to embed the password
|
||||
# field on the identification screen (single-screen login: one round trip and
|
||||
# one screen instead of two). Per authentik docs, when an Identification stage
|
||||
# carries a password stage the Password stage must NOT be bound separately —
|
||||
# the redundant order-20 binding on default-authentication-flow (pk
|
||||
# 0fc677db-a23f-4ee7-8648-da342e14573b) was deleted via the API in the same
|
||||
# change. Social-login users are unaffected: source buttons stay on the same
|
||||
# screen and bypass the password field.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
import {
|
||||
to = authentik_stage_identification.default_identification
|
||||
id = "32aca5ab-106e-43f4-a4cc-4513d80e57f3"
|
||||
}
|
||||
|
||||
data "authentik_stage" "default_authentication_password" {
|
||||
name = "default-authentication-password"
|
||||
}
|
||||
|
||||
resource "authentik_stage_identification" "default_identification" {
|
||||
name = "default-authentication-identification"
|
||||
password_stage = data.authentik_stage.default_authentication_password.id
|
||||
lifecycle {
|
||||
# Pin only password_stage; everything else stays UI-managed (same pattern
|
||||
# as authentik_stage_user_login.default_login above).
|
||||
ignore_changes = [
|
||||
user_fields,
|
||||
case_insensitive_matching,
|
||||
show_matched_user,
|
||||
show_source_labels,
|
||||
sources,
|
||||
enrollment_flow,
|
||||
recovery_flow,
|
||||
passwordless_flow,
|
||||
pretend_user_exists,
|
||||
captcha_stage,
|
||||
webauthn_stage,
|
||||
enable_remember_me,
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ resource "kubernetes_namespace" "authentik" {
|
|||
labels = {
|
||||
tier = var.tier
|
||||
"resource-governance/custom-quota" = "true"
|
||||
"keel.sh/enrolled" = "true"
|
||||
"keel.sh/enrolled" = "true"
|
||||
}
|
||||
}
|
||||
lifecycle {
|
||||
|
|
@ -111,3 +111,44 @@ module "ingress-outpost" {
|
|||
anti_ai_scraping = false
|
||||
exclude_crowdsec = true
|
||||
}
|
||||
|
||||
# Immutable caching for the flow-executor static assets. Authentik serves
|
||||
# /static/dist/* with version-fingerprinted filenames (e.g. poly-2026.2.4.js)
|
||||
# but no max-age, so browsers re-validate the login JS bundle on every signin
|
||||
# — and split-horizon internal users (direct to Traefik, no Cloudflare) get no
|
||||
# edge cache at all. Long-lived immutable caching is safe: every authentik
|
||||
# upgrade changes the asset URLs.
|
||||
resource "kubernetes_manifest" "static_cache_headers" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "static-cache-headers"
|
||||
namespace = kubernetes_namespace.authentik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
headers = {
|
||||
customResponseHeaders = {
|
||||
"Cache-Control" = "public, max-age=31536000, immutable"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "ingress-static" {
|
||||
source = "../../../../modules/kubernetes/ingress_factory"
|
||||
# Same-host path carve-out of the public authentik UI ingress above, only
|
||||
# adding the cache-headers middleware for the static asset prefix.
|
||||
# auth = "none": versioned static assets of the (already public) Authentik login UI.
|
||||
auth = "none"
|
||||
namespace = kubernetes_namespace.authentik.metadata[0].name
|
||||
name = "authentik-static"
|
||||
host = "authentik"
|
||||
service_name = "goauthentik-server"
|
||||
ingress_path = ["/static"]
|
||||
tls_secret_name = var.tls_secret_name
|
||||
anti_ai_scraping = false
|
||||
homepage_enabled = false
|
||||
extra_middlewares = ["authentik-static-cache-headers@kubernetescrd"]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,10 @@
|
|||
authentik:
|
||||
# NOTE: because we set existingSecret below, the chart does NOT render the
|
||||
# authentik.* values into an AUTHENTIK_* env Secret — the live env comes
|
||||
# from the orphaned, helm-keep-policy `goauthentik` Secret created by chart
|
||||
# 2025.10.3. Anything under authentik.* here is effectively INERT. All new
|
||||
# or tuned config MUST go through server.env / worker.env instead (see
|
||||
# .claude/reference/authentik-state.md).
|
||||
log_level: warning
|
||||
# log_level: trace
|
||||
secret_key: ""
|
||||
|
|
@ -14,38 +20,40 @@ authentik:
|
|||
port: 6432
|
||||
user: authentik
|
||||
password: ""
|
||||
# Persistent client-side connections (safe with PgBouncer session mode;
|
||||
# must be < pgbouncer server_idle_timeout=600s). Cuts Django connection
|
||||
# setup overhead off the ~70 sequential ORM ops per flow stage.
|
||||
conn_max_age: 60
|
||||
conn_health_checks: true
|
||||
cache:
|
||||
# Cache flow plans for 30m and policy evaluations for 15m. Authentik 2026.2
|
||||
# moved cache storage from Redis to Postgres, so a TTL hit is still a
|
||||
# SELECT — but a single indexed lookup beats re-evaluating PolicyBindings.
|
||||
timeout_flows: 1800
|
||||
timeout_policies: 900
|
||||
web:
|
||||
# Gunicorn: 3 workers × 4 threads per server pod (default 2×4).
|
||||
# Pairs with the server memory bump to 2Gi (each worker preloads Django ~500Mi).
|
||||
workers: 3
|
||||
threads: 4
|
||||
worker:
|
||||
# Celery-equivalent worker threads per pod (default 2, renamed from
|
||||
# AUTHENTIK_WORKER__CONCURRENCY in 2025.8).
|
||||
threads: 4
|
||||
|
||||
server:
|
||||
replicas: 3
|
||||
# Anonymous Django sessions (no completed login: bots, healthcheckers,
|
||||
# partial flows) expire in 2h. Default is days=1. Once login completes,
|
||||
# UserLoginStage.session_duration takes over via request.session.set_expiry.
|
||||
# Injected via server.env (not authentik.sessions.*) because we use
|
||||
# authentik.existingSecret.secretName, which makes the chart skip
|
||||
# rendering the AUTHENTIK_* secret — so the values block doesn't reach env.
|
||||
env:
|
||||
# Anonymous Django sessions (no completed login: bots, healthcheckers,
|
||||
# partial flows) expire in 2h. Default is days=1. Once login completes,
|
||||
# UserLoginStage.session_duration takes over via request.session.set_expiry.
|
||||
# Injected via server.env (not authentik.sessions.*) because we use
|
||||
# authentik.existingSecret.secretName, which makes the chart skip
|
||||
# rendering the AUTHENTIK_* secret — so the values block doesn't reach env.
|
||||
- name: AUTHENTIK_SESSIONS__UNAUTHENTICATED_AGE
|
||||
value: "hours=2"
|
||||
# Gunicorn: 3 workers × 4 threads per server pod (defaults 2×4).
|
||||
# Pairs with the server memory limit of 2Gi (each worker preloads
|
||||
# Django ~500Mi).
|
||||
- name: AUTHENTIK_WEB__WORKERS
|
||||
value: "3"
|
||||
- name: AUTHENTIK_WEB__THREADS
|
||||
value: "4"
|
||||
# Cache flow plans for 30m and policy evaluations for 15m (defaults 300s).
|
||||
# Authentik 2026.2 stores cache in Postgres, so a TTL hit is still a
|
||||
# SELECT — but a single indexed lookup beats re-planning the flow
|
||||
# (~70 sequential ORM ops per flow stage POST).
|
||||
- name: AUTHENTIK_CACHE__TIMEOUT_FLOWS
|
||||
value: "1800"
|
||||
- name: AUTHENTIK_CACHE__TIMEOUT_POLICIES
|
||||
value: "900"
|
||||
# Persistent client-side DB connections (safe with PgBouncer session mode;
|
||||
# must stay < pgbouncer server_idle_timeout=600s). Cuts per-request Django
|
||||
# connection setup off the auth hot path.
|
||||
- name: AUTHENTIK_POSTGRESQL__CONN_MAX_AGE
|
||||
value: "60"
|
||||
- name: AUTHENTIK_POSTGRESQL__CONN_HEALTH_CHECKS
|
||||
value: "true"
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
|
|
@ -82,11 +90,23 @@ worker:
|
|||
# certificate renewal) — no user-facing traffic, so 2-of-3 isn't
|
||||
# needed for availability. Drop saves ~100m sustained CPU.
|
||||
replicas: 2
|
||||
# Same unauthenticated_age cap as server — both the server (Django session
|
||||
# middleware) and worker (cleanup tasks) need to see the value.
|
||||
env:
|
||||
# Same unauthenticated_age cap as server — both the server (Django session
|
||||
# middleware) and worker (cleanup tasks) need to see the value.
|
||||
- name: AUTHENTIK_SESSIONS__UNAUTHENTICATED_AGE
|
||||
value: "hours=2"
|
||||
# Dramatiq worker threads per pod (default 2).
|
||||
- name: AUTHENTIK_WORKER__THREADS
|
||||
value: "4"
|
||||
# Keep cache + DB-connection settings in lockstep with server.env.
|
||||
- name: AUTHENTIK_CACHE__TIMEOUT_FLOWS
|
||||
value: "1800"
|
||||
- name: AUTHENTIK_CACHE__TIMEOUT_POLICIES
|
||||
value: "900"
|
||||
- name: AUTHENTIK_POSTGRESQL__CONN_MAX_AGE
|
||||
value: "60"
|
||||
- name: AUTHENTIK_POSTGRESQL__CONN_HEALTH_CHECKS
|
||||
value: "true"
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
|
|
|
|||
|
|
@ -720,6 +720,11 @@ resource "kubernetes_config_map" "auth_proxy_config" {
|
|||
"default.conf" = <<-EOT
|
||||
upstream authentik {
|
||||
server ak-outpost-authentik-embedded-outpost.authentik.svc.cluster.local:9000;
|
||||
# Reuse connections to the outpost. Without this every forward-auth
|
||||
# subrequest (= every request to every auth="required" ingress) opens
|
||||
# a fresh TCP connection. Requires HTTP/1.1 + cleared Connection
|
||||
# header on the proxy_pass locations below.
|
||||
keepalive 32;
|
||||
}
|
||||
server {
|
||||
listen 9000;
|
||||
|
|
@ -734,6 +739,8 @@ resource "kubernetes_config_map" "auth_proxy_config" {
|
|||
|
||||
location /outpost.goauthentik.io/auth/traefik {
|
||||
proxy_pass http://authentik;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
proxy_connect_timeout 3s;
|
||||
proxy_read_timeout 5s;
|
||||
proxy_send_timeout 5s;
|
||||
|
|
@ -764,6 +771,8 @@ resource "kubernetes_config_map" "auth_proxy_config" {
|
|||
|
||||
location /outpost.goauthentik.io/ {
|
||||
proxy_pass http://authentik;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
proxy_connect_timeout 3s;
|
||||
proxy_read_timeout 10s;
|
||||
proxy_set_header Host $host;
|
||||
|
|
@ -820,6 +829,11 @@ resource "kubernetes_deployment" "auth_proxy" {
|
|||
labels = {
|
||||
app = "auth-proxy"
|
||||
}
|
||||
annotations = {
|
||||
# nginx only reads its config at startup — roll the pods whenever
|
||||
# the ConfigMap content changes.
|
||||
"checksum/auth-proxy-config" = sha1(kubernetes_config_map.auth_proxy_config.data["default.conf"])
|
||||
}
|
||||
}
|
||||
spec {
|
||||
topology_spread_constraint {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue