Revert "tripit: Gmail ingest (12-month) + vbarzin owner + plans@ forward-to-parse"

This reverts commit 4cc9229e716b6683418a148a0f896442d5ab07ad.
This commit is contained in:
Viktor Barzin 2026-06-02 23:17:18 +00:00
parent 0c7ec3d470
commit f0843e398b
6 changed files with 11 additions and 352 deletions

View file

@ -1,220 +0,0 @@
# =============================================================================
# Authentik walling-off guard
# =============================================================================
# Detects regressions where a service that MUST work WITHOUT Authentik SSO gets
# accidentally walled off i.e. an ingress that should be `auth = "none"` (or a
# path-scoped carve-out) starts returning an Authentik forward-auth 302.
#
# The "walled off" signature (captured live 2026-06-02): a request to a
# must-stay-public URL returns 301/302 whose `Location` header points at
# Authentik:
# https://authentik.viktorbarzin.me/application/o/authorize/?client_id=...
# A correctly-carved path returns a non-redirect (200/400/401/403/404/405/426/)
# OR a redirect whose Location is NOT Authentik (e.g. a short-link 302).
#
# Mechanism: a tiny blackbox-exporter (below) probes each guarded URL with
# `no_follow_redirects: true` and FAILS the probe iff the `Location` header
# matches Authentik (`fail_if_header_matches`). Prometheus scrapes the probe
# (job `blackbox-authentik-walloff` in extraScrapeConfigs) and the
# `AuthentikWallingOffPublicPath` PrometheusRule (alerting_rules.yml, lane=security)
# routes a firing alert to the #security Slack receiver.
#
# Chosen over a CronJob+pushgateway probe (the apex-probe pattern) because that
# pattern's `pip install`/`apk add` per-run footprint is a known disk-write
# anti-pattern that got status-page-pusher disabled (memory id=559). blackbox is
# a single long-lived deployment zero per-run disk writes, fully declarative.
#
# ---------------------------------------------------------------------------
# TARGET LIST HOW TO ADD A NEW CARVE-OUT (one-line edit)
# ---------------------------------------------------------------------------
# When you add a new `auth = "none"` carve-out (or path-scoped carve-out) to any
# stack, add ONE representative GET-able URL here that returns a NON-Authentik
# response today. The map key becomes the `service` label on the probe metric
# and the alert. Verify with:
# curl -s -o /dev/null -w '%{http_code} %{redirect_url}\n' '<url>'
# It must NOT 302 to authentik.viktorbarzin.me before you add it.
# ---------------------------------------------------------------------------
locals {
# Representative URL per `auth = "none"` carve-out service. Each MUST return a
# non-Authentik response (200/3xx-non-authentik/400/404/426/) when the
# carve-out is intact. Probed every 60s; alert fires only on an Authentik 302.
authentik_walloff_targets = {
# meshcentral agent/relay paths (auth="none"): native mesh-cert clients.
# /agent.ashx 404s without WebSocket upgrade headers non-redirect = OK.
"meshcentral-agent" = "https://meshcentral.viktorbarzin.me/agent.ashx"
# uptime-kuma public status page (auth="none" on /status, /api/push, ).
"uptime-status" = "https://uptime.viktorbarzin.me/status/infra"
# shlink REST API health (auth="none"): X-Api-Key self-gated, CORS XHR.
"shlink-rest-health" = "https://url.viktorbarzin.me/rest/health"
# rybbit analytics tracker beacon (auth="none"): public sites embed this JS.
"rybbit-script" = "https://rybbit.viktorbarzin.me/api/script.js"
# insta2spotify API (auth="none"): browser fetch() XHRs, CORS preflight.
"insta2spotify-api-health" = "https://insta2spotify.viktorbarzin.me/api/health"
# k8s-portal setup script (auth="none"): curl-ed by automation, no cookies.
"k8s-portal-setup-script" = "https://k8s-portal.viktorbarzin.me/setup/script"
# instagram-poster image derivative endpoint (auth="none"): Meta's fetcher.
# /image 404s without a query param non-redirect = OK.
"instagram-poster-image" = "https://instagram-poster.viktorbarzin.me/image"
# trading-bot app root (auth="app"): WebAuthn/JWT in-app; was walled, now 200.
"trading-bot-app" = "https://trading.viktorbarzin.me/"
# NOTE: openclaw task-webhook (auth="none") is intentionally NOT probed it
# has no public DNS record (NXDOMAIN, external_monitor=false), so there is no
# externally GET-able URL to probe. Its carve-out is internal-only.
}
}
# --- blackbox-exporter -------------------------------------------------------
# Single-purpose blackbox-exporter. The `http_no_authentik_redirect` module does
# NOT follow redirects and FAILS the probe ONLY when the Location header points
# at Authentik. The status code alone must NEVER fail the probe carve-outs
# legitimately return 404 (meshcentral /agent.ashx without WS headers,
# instagram-poster /image without a query) or 400/401/403/426, all of which mean
# "carve-out intact". So `valid_status_codes` enumerates every plausible
# non-Authentik response INCLUDING 301/302 a redirect is status-valid, and the
# Authentik case is then singled out by `fail_if_header_matches` on Location
# (NOT empty: blackbox treats an empty list as "2xx only", which would
# false-fire on every 404 carve-out). probe_failed_due_to_regex isolates the
# Authentik match even further (used as a tie-break in the alert expr).
resource "kubernetes_config_map" "blackbox_exporter_config" {
metadata {
name = "blackbox-exporter-config"
namespace = kubernetes_namespace.monitoring.metadata[0].name
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"blackbox.yml" = yamlencode({
modules = {
http_no_authentik_redirect = {
prober = "http"
timeout = "10s"
http = {
method = "GET"
no_follow_redirects = true
preferred_ip_protocol = "ip4"
ip_protocol_fallback = false
fail_if_not_ssl = false
valid_http_versions = ["HTTP/1.1", "HTTP/2.0"]
# Every non-Authentik response a carve-out may legitimately return.
# 301/302 are INCLUDED so a redirect passes the status check and is
# judged solely by the Location header match below. 5xx is excluded:
# a backend 500 isn't a walling-off but is still worth surfacing as a
# probe failure. The full 2xx/3xx/4xx set keeps probe_success==1 for
# all intact carve-outs (404s included).
valid_status_codes = [200, 201, 202, 204, 301, 302, 304, 400, 401, 403, 404, 405, 409, 410, 426, 429]
# FAIL the probe if the response redirects to Authentik. This is the
# walling-off signature: forward-auth 301/302 -> /application/o/authorize
# on authentik.viktorbarzin.me (also matches /outpost.goauthentik.io).
fail_if_header_matches = [
{
header = "Location"
regexp = "(authentik\\.viktorbarzin\\.me|/outpost\\.goauthentik\\.io|/application/o/authorize)"
allow_missing = true
},
]
}
}
}
})
}
}
resource "kubernetes_deployment" "blackbox_exporter" {
metadata {
name = "blackbox-exporter"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
app = "blackbox-exporter"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "blackbox-exporter"
}
}
template {
metadata {
labels = {
app = "blackbox-exporter"
}
}
spec {
container {
name = "blackbox-exporter"
image = "prom/blackbox-exporter:v0.25.0"
args = ["--config.file=/etc/blackbox_exporter/blackbox.yml"]
port {
container_port = 9115
name = "http"
}
resources {
requests = {
cpu = "5m"
memory = "24Mi"
}
limits = {
memory = "48Mi"
}
}
volume_mount {
name = "config-volume"
mount_path = "/etc/blackbox_exporter/"
}
}
volume {
name = "config-volume"
config_map {
name = kubernetes_config_map.blackbox_exporter_config.metadata[0].name
}
}
dns_config {
option {
name = "ndots"
value = "2"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
# KEEL: monitoring ns is keel-enrolled (policy=patch) Keel owns the image
# tag and injects keel.sh annotations. Ignore so TF stops reverting Keel.
ignore_changes = [
spec[0].template[0].spec[0].dns_config,
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"],
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "blackbox_exporter" {
metadata {
name = "blackbox-exporter"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
app = "blackbox-exporter"
}
}
spec {
selector = {
app = "blackbox-exporter"
}
port {
name = "http"
port = 9115
target_port = 9115
}
}
}

View file

@ -60,5 +60,5 @@ resource "helm_release" "prometheus" {
# Re-enable temporarily only when a StatefulSet volumeClaimTemplate change needs --force.
force_update = false
values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password, alertmanager_slack_api_url = var.alertmanager_slack_api_url, tuya_api_key = var.tiny_tuya_service_secret, haos_api_token = var.haos_api_token, authentik_walloff_targets = local.authentik_walloff_targets })]
values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password, alertmanager_slack_api_url = var.alertmanager_slack_api_url, tuya_api_key = var.tiny_tuya_service_secret, haos_api_token = var.haos_api_token })]
}

View file

@ -99,14 +99,7 @@ alertmanager:
- source_matchers:
- alertname = TraefikDown
target_matchers:
- alertname =~ "PoisonFountainDown|ForwardAuthFallbackActive|AuthentikWallingOffPublicPath"
# Authentik down: every protected ingress behaves oddly (fallback proxies
# engage). The walling-off probe failing then is a symptom, not a regressed
# carve-out — suppress it so the root-cause AuthentikDown alert stands alone.
- source_matchers:
- alertname = AuthentikDown
target_matchers:
- alertname = AuthentikWallingOffPublicPath
- alertname =~ "PoisonFountainDown|ForwardAuthFallbackActive"
# A stale Traefik replica returns 404 for a fraction of requests; the same
# bug surfaces as TTFB / 4xx / 5xx / external-divergence symptoms downstream.
# When TraefikReplicaConfigStale fires, the root cause is identified —
@ -2949,59 +2942,8 @@ serverFiles:
subsystem: traefik
annotations:
summary: "Traefik replicas have diverging router counts (skew={{ $value | printf \"%.0f\" }}). Restart the laggard pod: `kubectl get pods -n traefik` and delete the one with fewer routers."
# Authentik walling-off guard. Fires when a must-stay-public carve-out URL
# (job blackbox-authentik-walloff, targets in authentik_walloff_probe.tf)
# starts returning an Authentik forward-auth 302. probe_success==0 there
# means blackbox's fail_if_header_matches caught a Location -> Authentik:
# a path-scoped `auth = "none"` carve-out was clobbered (TF revert, deploy,
# ingress_factory default flipping back to auth="required"). lane=security
# routes it to the #security Slack receiver (Slack-only, no paging).
- name: Authentik Walling Off
rules:
- alert: AuthentikWallingOffPublicPath
# probe_failed_due_to_regex==1 means the response's Location header
# matched Authentik — the precise walling-off signature, independent
# of status code. (We deliberately do NOT alert on bare
# probe_success==0: with the broad valid_status_codes, a 404 carve-out
# is success, and a 5xx/DNS/TLS failure is a DIFFERENT failure mode
# already covered by reachability alerts — not a forward-auth wall.)
# for:10m rides out scrape blips / brief Traefik restarts.
expr: probe_failed_due_to_regex{job="blackbox-authentik-walloff"} == 1
for: 10m
labels:
severity: warning
lane: security
subsystem: authentik
annotations:
summary: "Public path walled off by Authentik: {{ $labels.service }} ({{ $labels.instance }})"
description: "The must-stay-public URL {{ $labels.instance }} (carve-out `{{ $labels.service }}`) is failing its blackbox probe — most likely it now 302-redirects to Authentik SSO. A path-scoped `auth = \"none\"` carve-out probably regressed (TF revert / deploy / ingress_factory auth default flipping back to \"required\"). Native-client / public / webhook / WebSocket / SPA-XHR traffic to this endpoint is broken for strangers and machines. Check the owning stack's ingress_factory `auth` + `ingress_path`, and curl the URL: `curl -sI '{{ $labels.instance }}'` — a Location to authentik.viktorbarzin.me confirms the regression. Probe config + target list: stacks/monitoring/modules/monitoring/authentik_walloff_probe.tf."
extraScrapeConfigs: |
# Authentik walling-off guard. Probes each must-stay-public carve-out URL via
# blackbox-exporter's `http_no_authentik_redirect` module (no_follow_redirects +
# fail_if_header_matches on a Location -> Authentik). probe_success == 0 for a
# target here means that URL now 302s to Authentik — a carve-out regressed.
# Target list + "how to add a target" docs: authentik_walloff_probe.tf.
# Alert: AuthentikWallingOffPublicPath (alerting_rules.yml, lane=security).
- job_name: 'blackbox-authentik-walloff'
scrape_interval: 1m
scrape_timeout: 30s
metrics_path: /probe
params:
module: [http_no_authentik_redirect]
static_configs:
%{ for svc, url in authentik_walloff_targets ~}
- targets: ["${url}"]
labels:
service: "${svc}"
%{ endfor ~}
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 'blackbox-exporter.monitoring.svc.cluster.local:9115'
# The `mailserver-dovecot` scrape job was retired in code-1ik together
# with the Dovecot exporter. docker-mailserver 15.0.0's Dovecot 2.3
# doesn't emit the old_stats protocol the exporter expected, so the