fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip]
6d224861 came from a --no-checkout worktree whose empty index made the
commit drop every file except two. This restores 05b50d2b's full tree and
correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su
entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the
live infra was never applied from the broken commit.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
6d224861c4
commit
fd0f4a0365
1166 changed files with 358546 additions and 0 deletions
474
modules/kubernetes/anubis_instance/main.tf
Normal file
474
modules/kubernetes/anubis_instance/main.tf
Normal file
|
|
@ -0,0 +1,474 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
kubernetes = {
|
||||
source = "hashicorp/kubernetes"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Per-site Anubis reverse proxy.
|
||||
# Sits between Traefik and the real backend. On first visit, serves a
|
||||
# proof-of-work challenge; on success, drops a long-lived JWT cookie and
|
||||
# proxies the request through to `target_url`.
|
||||
#
|
||||
# Sharing a single ed25519 signing key across instances + COOKIE_DOMAIN at
|
||||
# the registrable domain means a token solved on one viktorbarzin.me subdomain
|
||||
# is honoured by every other Anubis-fronted site.
|
||||
|
||||
variable "name" {
|
||||
type = string
|
||||
description = "Short logical name (e.g. \"blog\"). Used to derive Service / Deployment / Secret names as anubis-<name>."
|
||||
}
|
||||
|
||||
variable "namespace" {
|
||||
type = string
|
||||
description = "Namespace to deploy into — typically the same as the protected backend service."
|
||||
}
|
||||
|
||||
variable "target_url" {
|
||||
type = string
|
||||
description = "Backend URL Anubis forwards passing requests to (e.g. http://blog.website.svc.cluster.local)."
|
||||
}
|
||||
|
||||
variable "cookie_domain" {
|
||||
type = string
|
||||
default = "viktorbarzin.me"
|
||||
description = "Cookie domain — set to the registrable domain so a single PoW solve covers every Anubis-fronted subdomain."
|
||||
}
|
||||
|
||||
variable "difficulty" {
|
||||
type = number
|
||||
default = 2
|
||||
description = "PoW difficulty (leading-zero hex chars). 2 = ~250ms desktop / ~700ms mobile. Bump for stronger filtering."
|
||||
}
|
||||
|
||||
variable "cookie_expiration_hours" {
|
||||
type = number
|
||||
default = 720 # 30 days
|
||||
description = "Lifetime of the issued JWT cookie in hours."
|
||||
}
|
||||
|
||||
variable "image_tag" {
|
||||
type = string
|
||||
default = "v1.25.0"
|
||||
description = "ghcr.io/techarohq/anubis tag — pin to a release, never :latest."
|
||||
}
|
||||
|
||||
variable "replicas" {
|
||||
type = number
|
||||
default = null
|
||||
description = "Optional replica count override. When null, defaults to 1 if shared_store_url is null and 2 otherwise. Capped at 2 — Redis can handle more but anti-affinity assumes ≤2 replicas per Anubis instance on a 5-node cluster."
|
||||
|
||||
validation {
|
||||
condition = var.replicas == null ? true : (var.replicas >= 1 && var.replicas <= 2)
|
||||
error_message = "replicas must be 1 or 2 (or null to auto-pick from shared_store_url presence)."
|
||||
}
|
||||
}
|
||||
|
||||
variable "shared_store_url" {
|
||||
type = string
|
||||
default = null
|
||||
description = "If set, Anubis stores in-flight challenge state in this Valkey/Redis-protocol URL instead of in-process memory, enabling HA across replicas. Format: redis://host:port/<db-index>. The DB index MUST be unique per Anubis instance (this module assumes 16 DBs available, common in standalone Redis). Cluster Redis is redis-master.redis.svc.cluster.local:6379 with HA via Sentinel + haproxy. Without this, replicas>1 causes ~50% PoW failures (challenge issued by pod A, solved against pod B → 500)."
|
||||
|
||||
validation {
|
||||
condition = var.shared_store_url == null || can(regex("^redis://[a-zA-Z0-9_.-]+:[0-9]+/[0-9]+$", var.shared_store_url))
|
||||
error_message = "shared_store_url must look like redis://host:port/<db-index> (explicit DB index required)."
|
||||
}
|
||||
}
|
||||
|
||||
variable "memory" {
|
||||
type = string
|
||||
default = "128Mi"
|
||||
description = "requests==limits memory. Anubis docs suggest 128Mi handles many concurrent clients."
|
||||
}
|
||||
|
||||
variable "policy_yaml" {
|
||||
type = string
|
||||
default = null
|
||||
description = "Override the strict default bot-policy YAML. Leave null to use the catch-all CHALLENGE policy."
|
||||
}
|
||||
|
||||
variable "cpu_request" {
|
||||
type = string
|
||||
default = "20m"
|
||||
description = "CPU request. PoW verification is server-cheap (just hash check)."
|
||||
}
|
||||
|
||||
locals {
|
||||
full_name = "anubis-${var.name}"
|
||||
labels = {
|
||||
"app" = local.full_name
|
||||
"app.kubernetes.io/name" = "anubis"
|
||||
"app.kubernetes.io/instance" = local.full_name
|
||||
"app.kubernetes.io/component" = "ai-bot-challenge"
|
||||
"app.kubernetes.io/managed-by" = "terraform"
|
||||
}
|
||||
|
||||
# Effective replicas: caller-override > shared-store-aware default.
|
||||
effective_replicas = coalesce(var.replicas, var.shared_store_url == null ? 1 : 2)
|
||||
|
||||
# Anubis store config. With backend=valkey, multiple Anubis pods can share
|
||||
# in-flight PoW state and a challenge issued by pod A is verifiable by pod
|
||||
# B. Default backend is in-process memory which only works at replicas=1.
|
||||
store_yaml_block = var.shared_store_url == null ? "" : <<-EOT
|
||||
|
||||
|
||||
store:
|
||||
backend: valkey
|
||||
parameters:
|
||||
url: "${var.shared_store_url}"
|
||||
EOT
|
||||
|
||||
# Strict bot policy. Default Anubis policy only WEIGHs Mozilla|Opera UAs
|
||||
# and lets unmatched UAs (curl, wget, Python-requests, scrapy, headless
|
||||
# CLI scrapers) fall through to ALLOW. We import the same upstream
|
||||
# snippets and append a catch-all CHALLENGE so anyone without JS+PoW
|
||||
# capability is filtered.
|
||||
default_policy_yaml = <<-EOT
|
||||
bots:
|
||||
# Hard-deny known-bad bots first — runs before the method bypass so
|
||||
# a declared bad bot can't sneak through by sending a POST.
|
||||
- import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
# Hard-deny declared AI/LLM crawlers (ClaudeBot, GPTBot, Bytespider, …).
|
||||
- import: (data)/meta/ai-block-aggressive.yaml
|
||||
# Whitelist legitimate search-engine crawlers (Googlebot, Bingbot, …).
|
||||
- import: (data)/crawlers/_allow-good.yaml
|
||||
# Challenge Firefox AI previews specifically.
|
||||
- import: (data)/clients/x-firefox-ai.yaml
|
||||
# Allow /.well-known, /robots.txt, /favicon.*, /sitemap.xml — keeps
|
||||
# the internet working for benign crawlers and discovery clients.
|
||||
- import: (data)/common/keep-internet-working.yaml
|
||||
# Allow every non-GET request through. Rationale: AI scrapers steal
|
||||
# the body of GETs (page content) — they don't POST. State-mutating
|
||||
# methods come from app XHRs (PrivateBin paste creation, Komga
|
||||
# uploads, SPA actions) and CORS preflight (OPTIONS). Challenging
|
||||
# those breaks the app, because the JS expects JSON and gets the
|
||||
# Anubis HTML challenge page. CrowdSec + rate-limit + per-app auth
|
||||
# already cover abuse on these methods.
|
||||
- name: allow-non-get-methods
|
||||
action: ALLOW
|
||||
expression: method != "GET"
|
||||
# Catch-all: every remaining (GET) request must solve the challenge.
|
||||
# This closes the "unmatched UA falls through to ALLOW" gap that
|
||||
# lets curl/wget/Python-requests scrape non-CDN-fronted hosts.
|
||||
- name: catchall-challenge
|
||||
path_regex: .*
|
||||
action: CHALLENGE
|
||||
EOT
|
||||
|
||||
# Final policy YAML: defaults (or caller override) plus an optional store
|
||||
# block when shared_store_url is set. Store block is module-managed and
|
||||
# appended universally — callers passing a custom policy_yaml shouldn't
|
||||
# include their own `store:` block (they would collide).
|
||||
rendered_policy_yaml = "${coalesce(var.policy_yaml, local.default_policy_yaml)}${local.store_yaml_block}"
|
||||
}
|
||||
|
||||
# Bot policy ConfigMap. Mounted into the pod and referenced by POLICY_FNAME.
|
||||
resource "kubernetes_config_map" "policy" {
|
||||
metadata {
|
||||
name = "${local.full_name}-policy"
|
||||
namespace = var.namespace
|
||||
labels = local.labels
|
||||
}
|
||||
data = {
|
||||
"botPolicies.yaml" = local.rendered_policy_yaml
|
||||
}
|
||||
}
|
||||
|
||||
# ED25519 signing key — pulled from Vault `secret/viktor` -> field
|
||||
# `anubis_ed25519_key`. Same key across every instance so JWTs are
|
||||
# cross-validatable, enabling cross-subdomain SSO.
|
||||
resource "kubernetes_manifest" "ed25519_secret" {
|
||||
manifest = {
|
||||
apiVersion = "external-secrets.io/v1beta1"
|
||||
kind = "ExternalSecret"
|
||||
metadata = {
|
||||
name = "${local.full_name}-key"
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec = {
|
||||
refreshInterval = "1h"
|
||||
secretStoreRef = {
|
||||
name = "vault-kv"
|
||||
kind = "ClusterSecretStore"
|
||||
}
|
||||
target = {
|
||||
name = "${local.full_name}-key"
|
||||
creationPolicy = "Owner"
|
||||
}
|
||||
data = [{
|
||||
secretKey = "key"
|
||||
remoteRef = {
|
||||
key = "viktor"
|
||||
property = "anubis_ed25519_key"
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "anubis" {
|
||||
metadata {
|
||||
name = local.full_name
|
||||
namespace = var.namespace
|
||||
labels = local.labels
|
||||
}
|
||||
|
||||
spec {
|
||||
replicas = local.effective_replicas
|
||||
|
||||
selector {
|
||||
match_labels = { app = local.full_name }
|
||||
}
|
||||
|
||||
strategy {
|
||||
type = "RollingUpdate"
|
||||
rolling_update {
|
||||
max_surge = 1
|
||||
max_unavailable = 0
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
metadata {
|
||||
labels = local.labels
|
||||
annotations = {
|
||||
# Roll the deployment whenever the policy YAML changes — Anubis
|
||||
# reads the policy at startup, so a ConfigMap update alone
|
||||
# doesn't take effect until pods restart.
|
||||
"checksum/policy" = sha256(local.rendered_policy_yaml)
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
# Spread replicas across nodes to survive a single node failure.
|
||||
# DoNotSchedule (not ScheduleAnyway) so 2 replicas are forced onto
|
||||
# different hosts — otherwise the scheduler may pile them on the
|
||||
# same node and a single node reboot takes the whole Anubis instance
|
||||
# down despite replicas=2. On a 5-node cluster the spread is always
|
||||
# satisfiable; the worst case (4 nodes unavailable) leaves one
|
||||
# replica Pending, but the other keeps serving.
|
||||
topology_spread_constraint {
|
||||
max_skew = 1
|
||||
topology_key = "kubernetes.io/hostname"
|
||||
when_unsatisfiable = "DoNotSchedule"
|
||||
label_selector {
|
||||
match_labels = { app = local.full_name }
|
||||
}
|
||||
}
|
||||
|
||||
container {
|
||||
name = "anubis"
|
||||
image = "ghcr.io/techarohq/anubis:${var.image_tag}"
|
||||
|
||||
port {
|
||||
name = "http"
|
||||
container_port = 8923
|
||||
}
|
||||
port {
|
||||
name = "metrics"
|
||||
container_port = 9090
|
||||
}
|
||||
|
||||
env {
|
||||
name = "BIND"
|
||||
value = ":8923"
|
||||
}
|
||||
env {
|
||||
name = "METRICS_BIND"
|
||||
value = ":9090"
|
||||
}
|
||||
env {
|
||||
name = "TARGET"
|
||||
value = var.target_url
|
||||
}
|
||||
env {
|
||||
name = "DIFFICULTY"
|
||||
value = tostring(var.difficulty)
|
||||
}
|
||||
env {
|
||||
name = "COOKIE_EXPIRATION_TIME"
|
||||
value = "${var.cookie_expiration_hours}h"
|
||||
}
|
||||
# Cross-subdomain SSO: cookie scoped to the registrable domain so
|
||||
# a JWT solved on any Anubis-fronted subdomain is honoured on every
|
||||
# other one. (COOKIE_DOMAIN and COOKIE_DYNAMIC_DOMAIN are mutually
|
||||
# exclusive — picking the explicit form.)
|
||||
env {
|
||||
name = "COOKIE_DOMAIN"
|
||||
value = var.cookie_domain
|
||||
}
|
||||
env {
|
||||
name = "COOKIE_SECURE"
|
||||
value = "true"
|
||||
}
|
||||
env {
|
||||
name = "COOKIE_SAME_SITE"
|
||||
value = "Lax"
|
||||
}
|
||||
# Built-in robots.txt that disallows known AI scrapers — well-behaved
|
||||
# bots get blocked here without ever paying the PoW cost.
|
||||
env {
|
||||
name = "SERVE_ROBOTS_TXT"
|
||||
value = "true"
|
||||
}
|
||||
# Drop cluster-internal IPs from XFF so Anubis sees the real client.
|
||||
env {
|
||||
name = "XFF_STRIP_PRIVATE"
|
||||
value = "true"
|
||||
}
|
||||
env {
|
||||
name = "SLOG_LEVEL"
|
||||
value = "INFO"
|
||||
}
|
||||
env {
|
||||
name = "ED25519_PRIVATE_KEY_HEX_FILE"
|
||||
# Mounted from the ESO-managed Secret below.
|
||||
value = "/keys/key"
|
||||
}
|
||||
env {
|
||||
name = "POLICY_FNAME"
|
||||
value = "/config/botPolicies.yaml"
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
name = "ed25519-key"
|
||||
mount_path = "/keys"
|
||||
read_only = true
|
||||
}
|
||||
volume_mount {
|
||||
name = "policy"
|
||||
mount_path = "/config"
|
||||
read_only = true
|
||||
}
|
||||
|
||||
resources {
|
||||
requests = {
|
||||
cpu = var.cpu_request
|
||||
memory = var.memory
|
||||
}
|
||||
limits = {
|
||||
memory = var.memory
|
||||
}
|
||||
}
|
||||
|
||||
# Liveness + readiness on the metrics endpoint (zero auth, always 200).
|
||||
liveness_probe {
|
||||
http_get {
|
||||
path = "/metrics"
|
||||
port = "metrics"
|
||||
}
|
||||
initial_delay_seconds = 10
|
||||
period_seconds = 30
|
||||
failure_threshold = 3
|
||||
}
|
||||
readiness_probe {
|
||||
http_get {
|
||||
path = "/metrics"
|
||||
port = "metrics"
|
||||
}
|
||||
initial_delay_seconds = 2
|
||||
period_seconds = 5
|
||||
failure_threshold = 2
|
||||
}
|
||||
|
||||
security_context {
|
||||
run_as_non_root = true
|
||||
run_as_user = 1000
|
||||
run_as_group = 1000
|
||||
allow_privilege_escalation = false
|
||||
read_only_root_filesystem = true
|
||||
capabilities {
|
||||
drop = ["ALL"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
volume {
|
||||
name = "ed25519-key"
|
||||
secret {
|
||||
secret_name = "${local.full_name}-key"
|
||||
items {
|
||||
key = "key"
|
||||
path = "key"
|
||||
}
|
||||
}
|
||||
}
|
||||
volume {
|
||||
name = "policy"
|
||||
config_map {
|
||||
name = kubernetes_config_map.policy.metadata[0].name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
|
||||
ignore_changes = [spec[0].template[0].spec[0].dns_config]
|
||||
}
|
||||
|
||||
depends_on = [kubernetes_manifest.ed25519_secret]
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "anubis" {
|
||||
metadata {
|
||||
name = local.full_name
|
||||
namespace = var.namespace
|
||||
labels = local.labels
|
||||
annotations = {
|
||||
"prometheus.io/scrape" = "true"
|
||||
"prometheus.io/path" = "/metrics"
|
||||
"prometheus.io/port" = "9090"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
selector = { app = local.full_name }
|
||||
port {
|
||||
name = "http"
|
||||
port = 8080
|
||||
target_port = 8923
|
||||
protocol = "TCP"
|
||||
}
|
||||
port {
|
||||
name = "metrics"
|
||||
port = 9090
|
||||
target_port = 9090
|
||||
protocol = "TCP"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_pod_disruption_budget_v1" "anubis" {
|
||||
metadata {
|
||||
name = local.full_name
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec {
|
||||
# max_unavailable=1 means: at most one pod can be voluntarily disrupted
|
||||
# at a time. With replicas=2 this allows clean rolling drains (one pod
|
||||
# goes down → other serves traffic → first recreates elsewhere). With
|
||||
# replicas=1 (no shared store) this is functionally equivalent to no
|
||||
# PDB — drain proceeds, brief outage, new pod schedules elsewhere.
|
||||
# Was min_available=1 before 2026-05-16 which deadlocked drains on
|
||||
# single-replica instances (eviction API can never satisfy the
|
||||
# constraint at replicas=1). See PM-2026-05-11.
|
||||
max_unavailable = "1"
|
||||
selector {
|
||||
match_labels = { app = local.full_name }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output "service_name" {
|
||||
value = kubernetes_service.anubis.metadata[0].name
|
||||
description = "ClusterIP service name. Pass this to ingress_factory's `service_name` so Traefik routes through Anubis."
|
||||
}
|
||||
|
||||
output "service_port" {
|
||||
value = 8080
|
||||
description = "Service port. Anubis listens on 8923 inside; the Service exposes 8080."
|
||||
}
|
||||
431
modules/kubernetes/ingress_factory/main.tf
Normal file
431
modules/kubernetes/ingress_factory/main.tf
Normal file
|
|
@ -0,0 +1,431 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
cloudflare = {
|
||||
source = "cloudflare/cloudflare"
|
||||
version = "~> 4"
|
||||
}
|
||||
kubernetes = {
|
||||
source = "hashicorp/kubernetes"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
variable "name" { type = string }
|
||||
variable "service_name" {
|
||||
type = string
|
||||
default = null # defaults to name
|
||||
}
|
||||
variable "host" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "namespace" { type = string }
|
||||
variable "external_name" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "port" {
|
||||
default = "80"
|
||||
}
|
||||
variable "tls_secret_name" {}
|
||||
variable "backend_protocol" {
|
||||
default = "HTTP"
|
||||
}
|
||||
variable "auth" {
|
||||
type = string
|
||||
default = "required"
|
||||
description = <<-EOT
|
||||
Auth posture for this ingress. Pick by asking "what gates the app?":
|
||||
|
||||
* "required" (default, fail-closed): Authentik forward-auth gates every
|
||||
request. Pick this when the backend has NO built-in user auth and
|
||||
Authentik is the only thing standing between strangers and the app.
|
||||
Examples: prowlarr, qbittorrent, netbox, phpipam, k8s-dashboard, any
|
||||
admin UI shipped without its own login.
|
||||
|
||||
* "app": the backend handles its own user authentication (NextAuth,
|
||||
Django sessions, OAuth, bearer-token API, etc.) and Authentik would
|
||||
only get in the way. No Authentik middleware is attached; the app's
|
||||
own login is the gate. Examples: immich, linkwarden, tandoor,
|
||||
freshrss, affine, actualbudget, audiobookshelf, novelapp.
|
||||
**Functionally identical to "none"** — the distinct name exists to
|
||||
record intent at the call site so future readers don't have to guess.
|
||||
|
||||
* "public": Authentik anonymous binding via the `public` outpost.
|
||||
Strangers are auto-bound to the `guest` Authentik user; logged-in
|
||||
users keep their identity in X-authentik-username. Only works for
|
||||
top-level browser navigation — CORS preflight rejects XHR/fetch and
|
||||
automation can't replay the cookie dance. Audit trail, not a gate.
|
||||
|
||||
* "none": no Authentik middleware, no own-auth claim — explicitly
|
||||
public or unauthenticated-by-design. Use for: Anubis-fronted content
|
||||
sites (where Anubis is the gate), native-client APIs that auth
|
||||
themselves (Git, /v2/, WebDAV/CalDAV, CardDAV), webhook receivers,
|
||||
OAuth callbacks, and Authentik outposts themselves.
|
||||
|
||||
**Anti-exposure rule** (the reason "app" exists as a distinct mode):
|
||||
only pick "app" or "none" AFTER you have verified the app has its own
|
||||
user auth (for "app") OR the endpoint is intentionally public (for
|
||||
"none"). Picking either of these on a naked admin UI exposes it to the
|
||||
internet. The default is "required" specifically so accidental omission
|
||||
fails closed.
|
||||
|
||||
**Convention**: when using "app" or "none", add a comment line above
|
||||
the `auth = "..."` line stating what gates the app or why it's public.
|
||||
Future-you reads the call site, not the module description.
|
||||
EOT
|
||||
validation {
|
||||
condition = contains(["required", "app", "public", "none"], var.auth)
|
||||
error_message = "auth must be one of: required, app, public, none."
|
||||
}
|
||||
}
|
||||
variable "ingress_path" {
|
||||
type = list(string)
|
||||
default = ["/"]
|
||||
}
|
||||
variable "max_body_size" {
|
||||
type = string
|
||||
default = null
|
||||
description = "Maximum request body size, e.g. '5g'. null = no limit (Traefik default). When set, a per-ingress Buffering middleware is created and attached."
|
||||
}
|
||||
variable "extra_annotations" {
|
||||
default = {}
|
||||
}
|
||||
variable "ssl_redirect" {
|
||||
default = true
|
||||
type = bool
|
||||
}
|
||||
variable "allow_local_access_only" {
|
||||
default = false
|
||||
type = bool
|
||||
}
|
||||
variable "root_domain" {
|
||||
default = "viktorbarzin.me"
|
||||
type = string
|
||||
}
|
||||
variable "custom_content_security_policy" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "exclude_crowdsec" {
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
variable "full_host" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
variable "extra_middlewares" {
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
variable "skip_default_rate_limit" {
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
variable "anti_ai_scraping" {
|
||||
type = bool
|
||||
default = null # null = auto (enabled when not protected, disabled when protected)
|
||||
}
|
||||
|
||||
variable "dns_type" {
|
||||
type = string
|
||||
default = "none"
|
||||
description = "Cloudflare DNS: 'proxied' (CNAME to tunnel), 'non-proxied' (A/AAAA to public IP), or 'none'"
|
||||
validation {
|
||||
condition = contains(["proxied", "non-proxied", "none"], var.dns_type)
|
||||
error_message = "dns_type must be 'proxied', 'non-proxied', or 'none'."
|
||||
}
|
||||
}
|
||||
|
||||
# Uptime Kuma external monitor: when true, annotate the ingress so the
|
||||
# external-monitor-sync CronJob creates a `[External] <name>` monitor pointing
|
||||
# at https://<host>. Null means "follow dns_type" — enabled when proxied.
|
||||
variable "external_monitor" {
|
||||
type = bool
|
||||
default = null
|
||||
description = "Enable Uptime Kuma external monitor. null = auto (enabled when dns_type == 'proxied')."
|
||||
}
|
||||
|
||||
variable "external_monitor_name" {
|
||||
type = string
|
||||
default = null
|
||||
description = "Override the monitor label. Defaults to the ingress hostname label (e.g. 'dawarich' for dawarich.viktorbarzin.me)."
|
||||
}
|
||||
|
||||
# Cloudflare config defaults — override via variables if these change.
|
||||
# Source of truth: config.tfvars (cloudflare_zone_id, cloudflare_tunnel_id, public_ip, public_ipv6)
|
||||
variable "cloudflare_zone_id" {
|
||||
type = string
|
||||
default = "fd2c5dd4efe8fe38958944e74d0ced6d"
|
||||
}
|
||||
|
||||
variable "cloudflare_tunnel_id" {
|
||||
type = string
|
||||
default = "75182cd7-bb91-4310-b961-5d8967da8b41"
|
||||
}
|
||||
|
||||
variable "public_ip" {
|
||||
type = string
|
||||
default = "176.12.22.76"
|
||||
}
|
||||
|
||||
variable "public_ipv6" {
|
||||
type = string
|
||||
default = "2001:470:6e:43d::2"
|
||||
}
|
||||
|
||||
variable "homepage_group" {
|
||||
type = string
|
||||
default = null # auto-detect from namespace
|
||||
}
|
||||
|
||||
variable "homepage_enabled" {
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
|
||||
locals {
|
||||
effective_host = var.full_host != null ? var.full_host : "${var.host != null ? var.host : var.name}.${var.root_domain}"
|
||||
# Anti-AI default: ON when no Authentik auth fronts the ingress (auth =
|
||||
# "none" or auth = "app" — either the app gates users itself or the site
|
||||
# is intentionally public). When Authentik gates the request
|
||||
# (required/public), the auth flow already discourages bots.
|
||||
effective_anti_ai = var.anti_ai_scraping != null ? var.anti_ai_scraping : (var.auth == "none" || var.auth == "app")
|
||||
|
||||
# Auth middleware selection. "app" and "none" both attach no Authentik
|
||||
# middleware — "app" signals "the backend has its own user auth", "none"
|
||||
# signals "intentionally public / native-client API / webhook". The
|
||||
# distinction lives at the call site for human readers; the runtime
|
||||
# effect is identical.
|
||||
auth_middleware = (
|
||||
var.auth == "required" ? "traefik-authentik-forward-auth@kubernetescrd" :
|
||||
var.auth == "public" ? "traefik-authentik-forward-auth-public@kubernetescrd" :
|
||||
null
|
||||
)
|
||||
|
||||
# External monitor enabled by default when the ingress has a public DNS
|
||||
# record (either CF-proxied or direct A/AAAA). Explicit bool overrides.
|
||||
effective_external_monitor = var.external_monitor != null ? var.external_monitor : (var.dns_type != "none")
|
||||
|
||||
# Emit the annotation when effective is true (positive signal), or when the
|
||||
# caller explicitly set external_monitor=false (opt-out). When the caller
|
||||
# leaves it null AND dns_type="none", emit nothing — the sync script's
|
||||
# default opt-in (any *.viktorbarzin.me ingress) keeps monitoring services
|
||||
# that are publicly reachable via routes we don't manage here (e.g.
|
||||
# helm-provisioned ingresses, services behind cloudflared tunnel with DNS
|
||||
# set elsewhere).
|
||||
external_monitor_annotations = local.effective_external_monitor ? merge(
|
||||
{ "uptime.viktorbarzin.me/external-monitor" = "true" },
|
||||
var.external_monitor_name != null ? { "uptime.viktorbarzin.me/external-monitor-name" = var.external_monitor_name } : {},
|
||||
) : (var.external_monitor == false ?
|
||||
{ "uptime.viktorbarzin.me/external-monitor" = "false" } : {}
|
||||
)
|
||||
|
||||
ns_to_group = {
|
||||
monitoring = "Infrastructure"
|
||||
prometheus = "Infrastructure"
|
||||
technitium = "Infrastructure"
|
||||
traefik = "Infrastructure"
|
||||
metallb-system = "Infrastructure"
|
||||
kyverno = "Infrastructure"
|
||||
authentik = "Identity & Security"
|
||||
crowdsec = "Identity & Security"
|
||||
woodpecker = "Development & CI"
|
||||
forgejo = "Development & CI"
|
||||
immich = "Media & Entertainment"
|
||||
frigate = "Smart Home"
|
||||
home-assistant = "Smart Home"
|
||||
ollama = "AI & Data"
|
||||
dbaas = "Infrastructure"
|
||||
servarr = "Media & Entertainment"
|
||||
navidrome = "Media & Entertainment"
|
||||
nextcloud = "Productivity"
|
||||
n8n = "Automation"
|
||||
changedetection = "Automation"
|
||||
finance = "Finance & Personal"
|
||||
homepage = "Core Platform"
|
||||
reverse-proxy = "Smart Home"
|
||||
mailserver = "Infrastructure"
|
||||
}
|
||||
|
||||
homepage_group = coalesce(
|
||||
var.homepage_group,
|
||||
lookup(local.ns_to_group, var.namespace, "Other")
|
||||
)
|
||||
|
||||
dns_name = local.effective_host == var.root_domain ? "@" : replace(local.effective_host, ".${var.root_domain}", "")
|
||||
|
||||
homepage_defaults = var.homepage_enabled ? {
|
||||
"gethomepage.dev/enabled" = "true"
|
||||
"gethomepage.dev/name" = replace(replace(var.name, "-", " "), "_", " ")
|
||||
"gethomepage.dev/group" = local.homepage_group
|
||||
"gethomepage.dev/href" = "https://${local.effective_host}"
|
||||
"gethomepage.dev/icon" = "${replace(var.name, "-", "")}.png"
|
||||
} : {}
|
||||
|
||||
# Parse "5g"/"50m"/"1024k"/"42" into bytes. Traefik's Buffering middleware
|
||||
# takes maxRequestBodyBytes as an integer. Empty unit = bytes.
|
||||
body_size_match = var.max_body_size == null ? null : regex("^([0-9]+)([kmgKMG]?)$", var.max_body_size)
|
||||
body_size_unit_multiplier = var.max_body_size == null ? 0 : (
|
||||
lower(local.body_size_match[1]) == "g" ? 1073741824 :
|
||||
lower(local.body_size_match[1]) == "m" ? 1048576 :
|
||||
lower(local.body_size_match[1]) == "k" ? 1024 :
|
||||
1
|
||||
)
|
||||
max_body_size_bytes = var.max_body_size == null ? 0 : tonumber(local.body_size_match[0]) * local.body_size_unit_multiplier
|
||||
}
|
||||
|
||||
|
||||
resource "kubernetes_service" "proxied-service" {
|
||||
count = var.external_name == null ? 0 : 1
|
||||
metadata {
|
||||
name = var.name
|
||||
namespace = var.namespace
|
||||
labels = {
|
||||
"app" = var.name
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
type = var.external_name != null ? "ExternalName" : "ClusterIP"
|
||||
external_name = var.name
|
||||
|
||||
port {
|
||||
name = "${var.name}-web"
|
||||
port = var.port
|
||||
protocol = "TCP"
|
||||
target_port = var.port
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_ingress_v1" "proxied-ingress" {
|
||||
metadata {
|
||||
name = var.name
|
||||
namespace = var.namespace
|
||||
annotations = merge({
|
||||
"traefik.ingress.kubernetes.io/router.middlewares" = join(",", compact(concat([
|
||||
"traefik-retry@kubernetescrd",
|
||||
"traefik-error-pages@kubernetescrd",
|
||||
var.skip_default_rate_limit ? null : "traefik-rate-limit@kubernetescrd",
|
||||
var.custom_content_security_policy == null ? "traefik-csp-headers@kubernetescrd" : null,
|
||||
var.exclude_crowdsec ? null : "traefik-crowdsec@kubernetescrd",
|
||||
local.effective_anti_ai ? "traefik-ai-bot-block@kubernetescrd" : null,
|
||||
local.effective_anti_ai ? "traefik-anti-ai-headers@kubernetescrd" : null,
|
||||
local.auth_middleware,
|
||||
var.allow_local_access_only ? "traefik-local-only@kubernetescrd" : null,
|
||||
var.custom_content_security_policy != null ? "${var.namespace}-custom-csp-${var.name}@kubernetescrd" : null,
|
||||
var.max_body_size != null ? "${var.namespace}-buffering-${var.name}@kubernetescrd" : null,
|
||||
], var.extra_middlewares)))
|
||||
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
|
||||
}, local.homepage_defaults, var.extra_annotations,
|
||||
var.dns_type != "none" ? { "cloudflare.viktorbarzin.me/dns-type" = var.dns_type } : {},
|
||||
local.external_monitor_annotations,
|
||||
)
|
||||
}
|
||||
|
||||
spec {
|
||||
ingress_class_name = "traefik"
|
||||
tls {
|
||||
hosts = [local.effective_host]
|
||||
secret_name = var.tls_secret_name
|
||||
}
|
||||
rule {
|
||||
host = local.effective_host
|
||||
http {
|
||||
dynamic "path" {
|
||||
for_each = var.ingress_path
|
||||
|
||||
content {
|
||||
path = path.value
|
||||
backend {
|
||||
service {
|
||||
|
||||
name = var.service_name != null ? var.service_name : var.name
|
||||
port {
|
||||
number = var.port
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Custom CSP headers middleware - created per service when custom_content_security_policy is set
|
||||
resource "kubernetes_manifest" "custom_csp" {
|
||||
count = var.custom_content_security_policy != null ? 1 : 0
|
||||
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "custom-csp-${var.name}"
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec = {
|
||||
headers = {
|
||||
contentSecurityPolicy = var.custom_content_security_policy
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Buffering middleware - created per service when max_body_size is set.
|
||||
# Traefik default is unlimited; setting maxRequestBodyBytes enforces a limit
|
||||
# (e.g. Forgejo container pushes can ship multi-GB layer blobs).
|
||||
resource "kubernetes_manifest" "buffering" {
|
||||
count = var.max_body_size != null ? 1 : 0
|
||||
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "buffering-${var.name}"
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec = {
|
||||
buffering = {
|
||||
maxRequestBodyBytes = local.max_body_size_bytes
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Cloudflare DNS records — created automatically when dns_type is set.
|
||||
# Proxied: CNAME to Cloudflare tunnel. Non-proxied: A + AAAA to public IP.
|
||||
resource "cloudflare_record" "proxied" {
|
||||
count = var.dns_type == "proxied" ? 1 : 0
|
||||
name = local.dns_name
|
||||
content = "${var.cloudflare_tunnel_id}.cfargotunnel.com"
|
||||
proxied = true
|
||||
ttl = 1
|
||||
type = "CNAME"
|
||||
zone_id = var.cloudflare_zone_id
|
||||
allow_overwrite = true
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "non_proxied_a" {
|
||||
count = var.dns_type == "non-proxied" ? 1 : 0
|
||||
name = local.dns_name
|
||||
content = var.public_ip
|
||||
proxied = false
|
||||
ttl = 1
|
||||
type = "A"
|
||||
zone_id = var.cloudflare_zone_id
|
||||
allow_overwrite = true
|
||||
}
|
||||
|
||||
resource "cloudflare_record" "non_proxied_aaaa" {
|
||||
count = var.dns_type == "non-proxied" ? 1 : 0
|
||||
name = local.dns_name
|
||||
content = var.public_ipv6
|
||||
proxied = false
|
||||
ttl = 1
|
||||
type = "AAAA"
|
||||
zone_id = var.cloudflare_zone_id
|
||||
allow_overwrite = true
|
||||
}
|
||||
88
modules/kubernetes/nfs_volume/main.tf
Normal file
88
modules/kubernetes/nfs_volume/main.tf
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
variable "name" {
|
||||
description = "Unique name for PV and PVC (convention: <service>-<purpose>)"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "namespace" {
|
||||
description = "Kubernetes namespace for the PVC"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "nfs_server" {
|
||||
description = "NFS server address"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "nfs_path" {
|
||||
description = "NFS export path (e.g. /mnt/main/myservice)"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "storage" {
|
||||
description = "Storage capacity (informational for NFS)"
|
||||
type = string
|
||||
default = "10Gi"
|
||||
}
|
||||
|
||||
variable "access_modes" {
|
||||
description = "PV/PVC access modes"
|
||||
type = list(string)
|
||||
default = ["ReadWriteMany"]
|
||||
}
|
||||
|
||||
resource "kubernetes_persistent_volume" "this" {
|
||||
metadata {
|
||||
name = var.name
|
||||
}
|
||||
spec {
|
||||
capacity = {
|
||||
storage = var.storage
|
||||
}
|
||||
access_modes = var.access_modes
|
||||
persistent_volume_reclaim_policy = "Retain"
|
||||
storage_class_name = "nfs-truenas"
|
||||
volume_mode = "Filesystem"
|
||||
|
||||
mount_options = [
|
||||
"nfsvers=4",
|
||||
"soft",
|
||||
"timeo=30",
|
||||
"retrans=3",
|
||||
"actimeo=5",
|
||||
]
|
||||
|
||||
persistent_volume_source {
|
||||
csi {
|
||||
driver = "nfs.csi.k8s.io"
|
||||
volume_handle = var.name
|
||||
volume_attributes = {
|
||||
server = var.nfs_server
|
||||
share = var.nfs_path
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_persistent_volume_claim" "this" {
|
||||
metadata {
|
||||
name = var.name
|
||||
namespace = var.namespace
|
||||
}
|
||||
spec {
|
||||
access_modes = var.access_modes
|
||||
storage_class_name = "nfs-truenas"
|
||||
volume_name = kubernetes_persistent_volume.this.metadata[0].name
|
||||
|
||||
resources {
|
||||
requests = {
|
||||
storage = var.storage
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output "claim_name" {
|
||||
description = "PVC name to use in pod spec persistent_volume_claim blocks"
|
||||
value = kubernetes_persistent_volume_claim.this.metadata[0].name
|
||||
}
|
||||
25
modules/kubernetes/setup_tls_secret/main.tf
Normal file
25
modules/kubernetes/setup_tls_secret/main.tf
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
variable "namespace" { type = string }
|
||||
variable "tls_secret_name" {}
|
||||
variable "tls_crt" {
|
||||
default = ""
|
||||
}
|
||||
variable "tls_key" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
resource "kubernetes_secret" "tls_secret" {
|
||||
metadata {
|
||||
name = var.tls_secret_name
|
||||
namespace = var.namespace
|
||||
}
|
||||
data = {
|
||||
# Cannot set default function in variable so use default behaviour here
|
||||
"tls.crt" = var.tls_crt == "" ? file("${path.root}/secrets/fullchain.pem") : var.tls_crt
|
||||
"tls.key" = var.tls_key == "" ? file("${path.root}/secrets/privkey.pem") : var.tls_key
|
||||
}
|
||||
type = "kubernetes.io/tls"
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: the sync-tls-secret policy stamps generate.kyverno.io/* + app.kubernetes.io/managed-by labels on this generated Secret
|
||||
ignore_changes = [metadata[0].labels]
|
||||
}
|
||||
}
|
||||
89
modules/kubernetes/setup_tls_secret/renew2.sh
Executable file
89
modules/kubernetes/setup_tls_secret/renew2.sh
Executable file
|
|
@ -0,0 +1,89 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
set -e
|
||||
|
||||
|
||||
export le_dir="/tmp/le/"
|
||||
export config_dir="$le_dir/out/config"
|
||||
export technitium_token="$TECHNITIUM_API_KEY"
|
||||
export certbot_auth="$le_dir/certbot_auth.sh"
|
||||
export certbot_cleanup="$le_dir/certbot_cleanup.sh"
|
||||
|
||||
mkdir $le_dir
|
||||
echo "Creating $certbot_auth"
|
||||
cat << EOF > $certbot_auth
|
||||
#!/usr/bin/env sh
|
||||
# Generate API token from DNS web console
|
||||
|
||||
# Create challenge TXT record
|
||||
|
||||
# TECHNITIUM
|
||||
#API_TOKEN="$technitium_token"
|
||||
# curl "http://technitium-web.technitium.svc.cluster.local:5380/api/zones/records/add?token=\$API_TOKEN&domain=_acme-challenge.\$CERTBOT_DOMAIN&type=TXT&ttl=60&text=\$CERTBOT_VALIDATION"
|
||||
|
||||
# CLOUDFLARE
|
||||
curl https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H "Authorization: Bearer $CLOUDFLARE_TOKEN" \
|
||||
-d "{
|
||||
\"comment\": \"certbot temporary challenge\",
|
||||
\"content\": \"\$CERTBOT_VALIDATION\",
|
||||
\"name\": \"_acme-challenge.\$CERTBOT_DOMAIN\",
|
||||
\"proxied\": false,
|
||||
\"ttl\": 60,
|
||||
\"type\": \"TXT\"
|
||||
}"
|
||||
|
||||
|
||||
# Sleep to make sure the change has time to propagate from primary to secondary name servers
|
||||
sleep 25
|
||||
EOF
|
||||
|
||||
chmod 700 $certbot_auth
|
||||
cat $certbot_auth
|
||||
|
||||
|
||||
echo "Creating $certbot_cleanup"
|
||||
cat << EOF > $certbot_cleanup
|
||||
#!/usr/bin/env sh
|
||||
# Generate API token from DNS web console
|
||||
|
||||
# Delete challenge TXT record
|
||||
|
||||
# TECHNIUM
|
||||
#API_TOKEN="$technitium_token"
|
||||
#curl "http://technitium-web.technitium.svc.cluster.local:5380/api/zones/records/delete?token=\$API_TOKEN&domain=_acme-challenge.\$CERTBOT_DOMAIN&type=TXT&text=\$CERTBOT_VALIDATION"
|
||||
|
||||
# CLOUDFLARE
|
||||
curl https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records -H "Authorization: Bearer $CLOUDFLARE_TOKEN" | jq -r '.result[] | select(.name | contains("acme")) | .id' | while read -r record_id; do
|
||||
curl https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records/\$record_id \
|
||||
-X DELETE \
|
||||
-H "Authorization: Bearer $CLOUDFLARE_TOKEN"
|
||||
done
|
||||
|
||||
EOF
|
||||
|
||||
chmod 700 $certbot_cleanup
|
||||
cat $certbot_cleanup
|
||||
|
||||
|
||||
echo "Cleaning up stale _acme-challenge TXT records from Cloudflare"
|
||||
curl -s "https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records?type=TXT&name=_acme-challenge.viktorbarzin.me" \
|
||||
-H "Authorization: Bearer $CLOUDFLARE_TOKEN" | jq -r '.result[].id' | while read -r old_id; do
|
||||
echo "Deleting stale record $old_id"
|
||||
curl -s -X DELETE "https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records/$old_id" \
|
||||
-H "Authorization: Bearer $CLOUDFLARE_TOKEN" > /dev/null
|
||||
done
|
||||
|
||||
echo "Executing certbot renew command"
|
||||
certbot certonly --manual --preferred-challenges=dns --email me@viktorbarzin.me --server https://acme-v02.api.letsencrypt.org/directory --agree-tos --manual-auth-hook $certbot_auth --config-dir $config_dir --work-dir $le_dir/workdir --logs-dir $le_dir/logsdir --no-eff-email --manual-cleanup-hook $certbot_cleanup -d viktorbarzin.me -d *.viktorbarzin.me
|
||||
|
||||
cat $config_dir/live/viktorbarzin.me/fullchain.pem
|
||||
cat $config_dir/live/viktorbarzin.me/privkey.pem
|
||||
|
||||
cp --remove-destination $config_dir/live/viktorbarzin.me/fullchain.pem ./secrets
|
||||
cp --remove-destination $config_dir/live/viktorbarzin.me/privkey.pem ./secrets
|
||||
|
||||
|
||||
echo "Done renewing cert. Output certificates stored in ./secrets\n"
|
||||
ls ./secrets
|
||||
Loading…
Add table
Add a link
Reference in a new issue