fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip]

6d224861 came from a --no-checkout worktree whose empty index made the
commit drop every file except two. This restores 05b50d2b's full tree and
correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su
entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the
live infra was never applied from the broken commit.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-09 08:45:33 +00:00
parent 6d224861c4
commit fd0f4a0365
1166 changed files with 358546 additions and 0 deletions

View file

@ -0,0 +1,91 @@
# =============================================================================
# Stack Template — Copy this directory to stacks/<your-app>/ and customize.
# Then submit a PR to the infra repo.
# =============================================================================
#
# Prerequisites:
# 1. You are a namespace-owner in k8s_users (Vault KV secret/platform)
# 2. Your namespace already exists (created by vault stack)
# 3. You have Vault CLI access: vault login -method=oidc
#
# Steps:
# 1. cp -r stacks/_template stacks/myapp
# 2. mv stacks/myapp/main.tf.example stacks/myapp/main.tf
# 3. Search-replace <placeholders> below
# 4. Store secrets: vault kv put secret/<your-username>/myapp KEY=value
# 5. git checkout -b feat/myapp && git push
# 6. Open PR, get reviewed, merge
# 7. Admin runs: cd stacks/myapp && terragrunt apply
# =============================================================================
variable "tls_secret_name" {
type = string
sensitive = true
}
# NOTE: Your namespace is auto-created by the vault stack from k8s_users.
# Only add a kubernetes_namespace resource if you need a SEPARATE namespace
# for this specific app (not your user namespace).
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = "<your-namespace>" # e.g., "anca"
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "app" {
metadata {
name = "<app-name>"
namespace = "<your-namespace>"
}
spec {
replicas = 1
selector {
match_labels = { app = "<app-name>" }
}
template {
metadata {
labels = { app = "<app-name>" }
}
spec {
container {
name = "<app-name>"
image = "<dockerhub-user>/<app-name>:<tag>"
port {
container_port = 8080 # Change to your app's port
}
resources {
requests = { cpu = "10m", memory = "256Mi" }
limits = { memory = "256Mi" }
}
}
}
}
}
lifecycle {
ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
}
}
resource "kubernetes_service" "app" {
metadata {
name = "<app-name>"
namespace = "<your-namespace>"
}
spec {
selector = { app = "<app-name>" }
port {
port = 80
target_port = 8080 # Match container_port above
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
namespace = "<your-namespace>"
name = "<app-name>"
tls_secret_name = var.tls_secret_name
dns_type = "proxied" # "proxied" (Cloudflare CDN), "non-proxied" (direct A/AAAA), or "none"
auth = "required" # "required" (Authentik login), "public" (anonymous bound to guest), or "none" (no auth)
}

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}

127
stacks/actualbudget/.terraform.lock.hcl generated Normal file
View file

@ -0,0 +1,127 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.7"
constraints = "~> 4.0"
hashes = [
"h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
"zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
"zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
"zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
"zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
"zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
"zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
"zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
"zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
"zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
"zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
"zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
"zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
"zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
]
}
provider "registry.terraform.io/goauthentik/authentik" {
version = "2024.12.1"
constraints = "~> 2024.10"
hashes = [
"h1:roBMd+gi+TGgikH/bMzEI8JfvJiMAQWt+8FmokCrQIs=",
"zh:090260dc7889ea822ec1d899344e1ee23eba5290461989c0796149c9511f2316",
"zh:13c2655ff824b0dc4b9bb832b5ca6d41dba97cb280330258c5fef4115e236209",
"zh:166a73c3a810c9c895d68a8ff968158f339f8a2c1c03e20ec9fc5ed99cc64e20",
"zh:203777eae1cdc711233315499643180604cff2324411b186b7cf07fdbe16f655",
"zh:3b2f18c9a8d28dac74dc6bbf168c946855ab9c68f053578d4630c50d5eaf30a0",
"zh:4822275985f6b74b6196c47112316a4252db22cf4ceaef7c9ab4c66d488abf2f",
"zh:53ea97562666c8a5a2f6d63d418a302a7f8ee4b7bb7da35dedaa89aa5708b7f0",
"zh:56b8a230901e3550c92a1d3f58ee9dafe9853f30fe4315af3ab28ae63262e15d",
"zh:6293ab7b1fd8206a0c853591f50186aca4a1eff117b2a773e10760a23a2c83e9",
"zh:9433970f79fb92d8aae3ee436db5630ab312c78b6dc9df9c1db3273a18f8aaa1",
"zh:95df406214f79b3b98222d7c7fe8fc319a3d90b7a9d53e1d5abbda5dfb8b9436",
"zh:a85880da0552a42c8f449390fbd7d8b03541d1a13e04bba9f1404fa658754260",
"zh:a95f6e9bd62c67e70eba1b1a14728856b9a6a28cd1e5e3be54a7718882c87e7f",
"zh:dd599b51c5beb34a4c6feece244fde07d2558d69929449ab1fd39a5ebe738781",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.1.0"
hashes = [
"h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=",
"zh:0215c5c60be62028c09a2f22458e89cda3ef5830a632299f1d401eb3538874b0",
"zh:09ebb9f442431e278a310a9423f32caf467cb4b3cad3fe59573ca71fa7b14e20",
"zh:0c4e5912f83bb35846ae0a9ae54fc320706ee61894cd21cc6b4181b1c5a2fa5c",
"zh:1678c982853ad461e65ccb5e79d585e13ed109dd47dab2a66d3a7a304faeef65",
"zh:1c050a5c15e330457a9c18caacf61a923c59d663e13f2962e4b32f04fef523a0",
"zh:2c55bcec83be58ec132c7cb0a1ac644758b800d794fdc636d53a0eada0358a3a",
"zh:a062bb0aa316c08d8460c66a5d68da71da40de5d3bc3b31abcf3a1a9a19650f1",
"zh:a26fdea0afaa9b247c73c0b42843ca51ba7db0ac2571f9d3d50dcabd20ca1b98",
"zh:c872c9385a78d502bf5823d61cd3bb0f9a0585030e025eb12585c83451beeaa1",
"zh:f180879af931182beee4c8c0d9dab62b81d86f17ddcbe3786ef4c7cec9163a4e",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
"zh:f70f5789264069e0eef06f9b5d5fde955ef7206f7d446d1ce51a4c37a3f3e02f",
]
}
provider "registry.terraform.io/hashicorp/random" {
version = "3.8.1"
hashes = [
"h1:Eexl06+6J+s75uD46+WnZtpJZYRVUMB0AiuPBifK6Jc=",
"h1:u8AKlWVDTH5r9YLSeswoVEjiY72Rt4/ch7U+61ZDkiQ=",
"zh:08dd03b918c7b55713026037c5400c48af5b9f468f483463321bd18e17b907b4",
"zh:0eee654a5542dc1d41920bbf2419032d6f0d5625b03bd81339e5b33394a3e0ae",
"zh:229665ddf060aa0ed315597908483eee5b818a17d09b6417a0f52fd9405c4f57",
"zh:2469d2e48f28076254a2a3fc327f184914566d9e40c5780b8d96ebf7205f8bc0",
"zh:37d7eb334d9561f335e748280f5535a384a88675af9a9eac439d4cfd663bcb66",
"zh:741101426a2f2c52dee37122f0f4a2f2d6af6d852cb1db634480a86398fa3511",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:a902473f08ef8df62cfe6116bd6c157070a93f66622384300de235a533e9d4a9",
"zh:b85c511a23e57a2147355932b3b6dce2a11e856b941165793a0c3d7578d94d05",
"zh:c5172226d18eaac95b1daac80172287b69d4ce32750c82ad77fa0768be4ea4b8",
"zh:dab4434dba34aad569b0bc243c2d3f3ff86dd7740def373f2a49816bd2ff819b",
"zh:f49fd62aa8c5525a5c17abd51e27ca5e213881d58882fd42fec4a545b53c9699",
]
}
provider "registry.terraform.io/hashicorp/vault" {
version = "4.8.0"
constraints = "~> 4.0"
hashes = [
"h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
"h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
"zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
"zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
"zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
"zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
"zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
"zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
"zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
"zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
"zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
"zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
]
}

View file

@ -0,0 +1,434 @@
variable "tls_secret_name" {}
variable "name" {}
variable "tag" {
default = "latest"
}
variable "tier" { type = string }
variable "sync_id" {
type = string
default = null # If not passed, we won't run banksync
}
variable "budget_encryption_password" {
type = string
default = null # If not passed, we won't run banksync ;known after initial installation
sensitive = true
}
# Plan-time toggles these MUST be known at plan time. The secret values
# (budget_encryption_password, sync_id) are read from ESO-managed K8s Secrets
# and are unknown at plan time on first apply, so we cannot base `count` on
# them directly. Callers pass these booleans as hardcoded plan-time constants
# that reflect whether the corresponding credentials are expected to exist.
variable "enabled" {
type = bool
default = true
description = "Deploy this instance. When false, only the PVC is kept (data preservation); deployment, service, ingress, http-api, and cronjob are not created. Flip back to true to bring the instance back."
}
variable "enable_http_api" {
type = bool
default = false
description = "Deploy the actual-http-api sidecar. Must be true for the cronjob to run."
}
variable "enable_bank_sync" {
type = bool
default = false
description = "Deploy the daily bank-sync CronJob. Requires enable_http_api=true."
}
variable "nfs_server" { type = string }
variable "homepage_annotations" {
type = map(string)
default = {}
}
variable "storage_size" {
type = string
default = "1Gi"
}
resource "kubernetes_persistent_volume_claim" "data_encrypted" {
wait_until_bound = false
metadata {
name = "actualbudget-${var.name}-data-encrypted"
namespace = "actualbudget"
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm-encrypted"
resources {
requests = {
storage = var.storage_size
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
resource "kubernetes_deployment" "actualbudget" {
count = var.enabled ? 1 : 0
metadata {
name = "actualbudget-${var.name}"
namespace = "actualbudget"
labels = {
app = "actualbudget-${var.name}"
tier = var.tier
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "actualbudget-${var.name}"
}
}
template {
metadata {
annotations = {
"diun.enable" = "false" # daily updates; pretty noisy
"diun.include_tags" = "^${var.tag}$"
}
labels = {
app = "actualbudget-${var.name}"
}
}
spec {
container {
image = "actualbudget/actual-server:${var.tag}"
name = "actualbudget"
port {
container_port = 5006
}
resources {
requests = {
cpu = "15m"
memory = "320Mi"
}
limits = {
memory = "400Mi"
}
}
volume_mount {
name = "data"
mount_path = "/data"
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "actualbudget" {
count = var.enabled ? 1 : 0
metadata {
name = "budget-${var.name}"
namespace = "actualbudget"
labels = {
app = "actualbudget-${var.name}"
}
}
spec {
selector = {
app = "actualbudget-${var.name}"
}
port {
name = "http"
port = 80
target_port = 5006
}
}
}
module "ingress" {
count = var.enabled ? 1 : 0
source = "../../../modules/kubernetes/ingress_factory"
# auth = "app": Actual Budget enforces a server password + per-user login
# on its own sync API. Authentik forward-auth was 302-ing the mobile/web
# sync clients; Actual's own auth gates users.
auth = "app"
namespace = "actualbudget"
name = "budget-${var.name}"
tls_secret_name = var.tls_secret_name
dns_type = "proxied"
extra_annotations = var.homepage_annotations
}
resource "random_string" "api-key" {
length = 32
lower = true
}
resource "kubernetes_deployment" "actualbudget-http-api" {
count = var.enabled && var.enable_http_api ? 1 : 0
metadata {
name = "actualbudget-http-api-${var.name}"
namespace = "actualbudget"
labels = {
app = "actualbudget-http-api-${var.name}"
tier = var.tier
}
}
spec {
replicas = 1
strategy {
type = "RollingUpdate"
}
selector {
match_labels = {
app = "actualbudget-http-api-${var.name}"
}
}
template {
metadata {
labels = {
app = "actualbudget-http-api-${var.name}"
}
}
spec {
container {
image = "jhonderson/actual-http-api:latest"
name = "actualbudget"
resources {
requests = {
cpu = "50m"
memory = "768Mi"
}
limits = {
memory = "768Mi"
}
}
port {
container_port = 5007
}
env {
name = "ACTUAL_SERVER_URL"
value = "http://budget-${var.name}.actualbudget.svc.cluster.local"
}
env {
name = "ACTUAL_SERVER_PASSWORD"
value = var.budget_encryption_password
}
env {
name = "API_KEY"
value = random_string.api-key.result
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "actualbudget-http-api" {
count = var.enabled && var.enable_http_api ? 1 : 0
metadata {
name = "budget-http-api-${var.name}"
namespace = "actualbudget"
labels = {
app = "actualbudget-http-api-${var.name}"
}
}
spec {
selector = {
app = "actualbudget-http-api-${var.name}"
}
port {
name = "http"
port = 80
target_port = 5007
}
}
}
resource "kubernetes_cron_job_v1" "bank-sync" {
count = var.enabled && var.enable_bank_sync ? 1 : 0
metadata {
name = "bank-sync-${var.name}"
namespace = "actualbudget"
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 5
schedule = "0 0 * * *" # Daily
starting_deadline_seconds = 60
successful_jobs_history_limit = 10
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 86400
template {
metadata {}
spec {
container {
name = "bank-sync"
image = "alpine:3.20"
command = ["/bin/sh", "-c", <<-EOT
set -u
apk add --no-cache curl jq >/dev/null 2>&1
USER_NAME='${var.name}'
SYNC_ID='${var.sync_id}'
API_KEY='${random_string.api-key.result}'
PW='${var.budget_encryption_password}'
PG="http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/bank-sync-$USER_NAME"
API="http://budget-http-api-$USER_NAME"
START=$(date +%s)
# Enumerate active accounts: open + on-budget.
ACCOUNTS=$(curl -fsS "$API/v1/budgets/$SYNC_ID/accounts" \
-H "x-api-key: $API_KEY" \
-H "budget-encryption-password: $PW" \
| jq -c '.data[] | select(.closed == false and .offbudget == false) | {id, name}')
if [ -z "$ACCOUNTS" ]; then
echo "ERROR: GET /accounts returned no eligible accounts; aborting"
exit 1
fi
: > /tmp/payload
rm -f /tmp/any_success
# Per-account sync. Each account has its own PSD2/GoCardless
# quota (4 successful pulls per 24h), so we treat them
# independently one rate-limited account doesn't mark the
# run as a failure.
echo "$ACCOUNTS" | while IFS= read -r ACCT; do
[ -z "$ACCT" ] && continue
ID=$(echo "$ACCT" | jq -r '.id')
NAME=$(echo "$ACCT" | jq -r '.name')
LABEL=$(echo "$NAME" | sed -E 's/[^a-zA-Z0-9]+/_/g')
HTTP_CODE=$(curl -s -o /tmp/r.txt -w '%%{http_code}' \
-X POST "$API/v1/budgets/$SYNC_ID/accounts/$ID/banksync" \
-H 'accept: application/json' \
-H "x-api-key: $API_KEY" \
-H "budget-encryption-password: $PW") || HTTP_CODE=0
NOW=$(date +%s)
if [ "$HTTP_CODE" = "200" ]; then
echo "OK account=$NAME"
printf 'bank_sync_account_success{account="%s"} 1\n' "$LABEL" >> /tmp/payload
printf 'bank_sync_account_last_success_timestamp{account="%s"} %s\n' "$LABEL" "$NOW" >> /tmp/payload
: > /tmp/any_success
else
echo "FAIL account=$NAME http=$HTTP_CODE body=$(cat /tmp/r.txt)"
printf 'bank_sync_account_success{account="%s"} 0\n' "$LABEL" >> /tmp/payload
fi
done
END=$(date +%s)
DUR=$((END - START))
if [ -f /tmp/any_success ]; then
ANY=1
else
ANY=0
fi
# Pushgateway POST preserves prior values for label sets not
# in the payload, so per-account last_success_timestamp values
# for accounts that failed this run keep their prior good
# values that's what BankSyncAccountStale alerts on.
{
printf '# HELP bank_sync_account_success Per-account sync result (1=ok, 0=fail)\n'
printf '# TYPE bank_sync_account_success gauge\n'
printf '# HELP bank_sync_account_last_success_timestamp Per-account Unix timestamp of last successful sync\n'
printf '# TYPE bank_sync_account_last_success_timestamp gauge\n'
cat /tmp/payload
printf '# HELP bank_sync_success 1 if at least one account synced this run\n'
printf '# TYPE bank_sync_success gauge\n'
printf 'bank_sync_success %s\n' "$ANY"
printf '# HELP bank_sync_duration_seconds Total duration of the cron run\n'
printf '# TYPE bank_sync_duration_seconds gauge\n'
printf 'bank_sync_duration_seconds %s\n' "$DUR"
if [ "$ANY" = "1" ]; then
printf '# HELP bank_sync_last_success_timestamp Unix timestamp of the most recent successful sync of any account\n'
printf '# TYPE bank_sync_last_success_timestamp gauge\n'
printf 'bank_sync_last_success_timestamp %s\n' "$END"
fi
} | curl -fsS --data-binary @- "$PG"
EOT
]
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# State migration for the new `enabled` toggle (2026-05-13): adding
# count to these resources shifts their addresses to [0]. Without
# moved {}, Terraform would destroy+recreate. Existing http-api / bank-sync
# resources already had count, so no migration needed there.
moved {
from = kubernetes_deployment.actualbudget
to = kubernetes_deployment.actualbudget[0]
}
moved {
from = kubernetes_service.actualbudget
to = kubernetes_service.actualbudget[0]
}
moved {
from = kubernetes_service.actualbudget-http-api
to = kubernetes_service.actualbudget-http-api[0]
}
moved {
from = module.ingress
to = module.ingress[0]
}

142
stacks/actualbudget/main.tf Normal file
View file

@ -0,0 +1,142 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
variable "nfs_server" { type = string }
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "actualbudget-secrets"
namespace = "actualbudget"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "actualbudget-secrets"
}
dataFrom = [{
extract = {
key = "actualbudget"
}
}]
}
}
depends_on = [kubernetes_namespace.actualbudget]
}
data "kubernetes_secret" "eso_secrets" {
metadata {
name = "actualbudget-secrets"
namespace = kubernetes_namespace.actualbudget.metadata[0].name
}
depends_on = [kubernetes_manifest.external_secret]
}
locals {
credentials = jsondecode(data.kubernetes_secret.eso_secrets.data["credentials"])
}
# To create a new deployment:
/**
1. Create a subdirectory for {name} under /srv/nfs on the Proxmox host (192.168.1.127)
2. Add {name} as proxied cloudflare route (tfvars)
3. Add module here
*/
resource "kubernetes_namespace" "actualbudget" {
metadata {
name = "actualbudget"
labels = {
"istio-injection" : "disabled"
tier = local.tiers.edge
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.actualbudget.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# https://budget-viktor.viktorbarzin.me/
module "viktor" {
source = "./factory"
name = "viktor"
tag = "26.4.0"
tls_secret_name = var.tls_secret_name
nfs_server = var.nfs_server
depends_on = [kubernetes_namespace.actualbudget]
tier = local.tiers.edge
enable_http_api = true
enable_bank_sync = true
storage_size = "4Gi"
budget_encryption_password = lookup(local.credentials["viktor"], "password", null)
sync_id = lookup(local.credentials["viktor"], "sync_id", null)
homepage_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Budget Viktor"
"gethomepage.dev/description" = "Personal budget"
"gethomepage.dev/icon" = "actual-budget.png"
"gethomepage.dev/group" = "Finance & Personal"
"gethomepage.dev/pod-selector" = ""
}
}
# https://budget-anca.viktorbarzin.me/
module "anca" {
source = "./factory"
name = "anca"
tag = "26.4.0"
tls_secret_name = var.tls_secret_name
nfs_server = var.nfs_server
depends_on = [kubernetes_namespace.actualbudget]
tier = local.tiers.edge
enable_http_api = true
enable_bank_sync = true
budget_encryption_password = lookup(local.credentials["anca"], "password", null)
sync_id = lookup(local.credentials["anca"], "sync_id", null)
homepage_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Budget Anca"
"gethomepage.dev/description" = "Personal budget"
"gethomepage.dev/icon" = "actual-budget.png"
"gethomepage.dev/group" = "Finance & Personal"
"gethomepage.dev/pod-selector" = ""
}
}
# https://budget-emo.viktorbarzin.me/
# Disabled 2026-05-13: Emo isn't using this instance. PVC is preserved so
# we can flip enabled back to true to bring the instance back as-was.
# The empty accounts list (vs. anca/viktor) was causing the daily bank-sync
# CronJob to fail and trigger BankSyncStale.
module "emo" {
source = "./factory"
name = "emo"
tag = "26.4.0"
tls_secret_name = var.tls_secret_name
nfs_server = var.nfs_server
depends_on = [kubernetes_namespace.actualbudget]
tier = local.tiers.edge
enabled = false
enable_http_api = false
enable_bank_sync = false
budget_encryption_password = lookup(local.credentials["emo"], "password", null)
sync_id = lookup(local.credentials["emo"], "sync_id", null)
homepage_annotations = {}
}

View file

@ -0,0 +1,37 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
authentik = {
source = "goauthentik/authentik"
version = "~> 2024.10"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}

1
stacks/actualbudget/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,13 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}
dependency "vault" {
config_path = "../vault"
skip_outputs = true
}

81
stacks/affine/.terraform.lock.hcl generated Normal file
View file

@ -0,0 +1,81 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.7"
constraints = "~> 4.0"
hashes = [
"h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
"zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
"zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
"zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
"zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
"zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
"zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
"zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
"zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
"zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
"zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
"zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
"zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
"zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
]
}
provider "registry.terraform.io/goauthentik/authentik" {
version = "2024.12.1"
constraints = "~> 2024.10"
hashes = [
"h1:roBMd+gi+TGgikH/bMzEI8JfvJiMAQWt+8FmokCrQIs=",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.1.0"
hashes = [
"h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=",
]
}
provider "registry.terraform.io/hashicorp/vault" {
version = "4.8.0"
constraints = "~> 4.0"
hashes = [
"h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
"h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
"zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
"zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
"zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
"zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
"zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
"zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
"zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
"zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
"zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
"zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
]
}

390
stacks/affine/main.tf Normal file
View file

@ -0,0 +1,390 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
variable "nfs_server" { type = string }
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "affine-secrets"
namespace = "affine"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "affine-secrets"
}
dataFrom = [{
extract = {
key = "affine"
}
}]
}
}
depends_on = [kubernetes_namespace.affine]
}
data "kubernetes_secret" "eso_secrets" {
metadata {
name = "affine-secrets"
namespace = kubernetes_namespace.affine.metadata[0].name
}
depends_on = [kubernetes_manifest.external_secret]
}
# DB credentials from Vault database engine (rotated automatically)
# Provides DATABASE_URL that auto-updates when password rotates
resource "kubernetes_manifest" "db_external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "affine-db-creds"
namespace = "affine"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-database"
kind = "ClusterSecretStore"
}
target = {
name = "affine-db-creds"
template = {
data = {
DATABASE_URL = "postgresql://affine:{{ .password }}@${var.postgresql_host}:5432/affine"
}
}
}
data = [{
secretKey = "password"
remoteRef = {
key = "static-creds/pg-affine"
property = "password"
}
}]
}
}
depends_on = [kubernetes_namespace.affine]
}
locals {
mailserver_accounts = jsondecode(data.kubernetes_secret.eso_secrets.data["mailserver_accounts"])
}
variable "redis_host" { type = string }
variable "postgresql_host" { type = string }
variable "mail_host" { type = string }
resource "kubernetes_namespace" "affine" {
metadata {
name = "affine"
labels = {
tier = local.tiers.aux
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.affine.metadata[0].name
tls_secret_name = var.tls_secret_name
}
locals {
common_env = [
{
name = "REDIS_SERVER_HOST"
value = var.redis_host
},
{
name = "AFFINE_INDEXER_ENABLED"
value = "false"
},
{
name = "NODE_OPTIONS"
value = "--max-old-space-size=4096"
},
# Server URL configuration
{
name = "AFFINE_SERVER_EXTERNAL_URL"
value = "https://affine.viktorbarzin.me"
},
{
name = "AFFINE_SERVER_HTTPS"
value = "true"
},
# Email/SMTP configuration
{
name = "MAILER_HOST"
value = var.mail_host
},
{
name = "MAILER_PORT"
value = "587"
},
{
name = "MAILER_USER"
value = "info@viktorbarzin.me"
},
{
name = "MAILER_PASSWORD"
value = local.mailserver_accounts["info@viktorbarzin.me"]
},
{
name = "MAILER_SENDER"
value = "AFFiNE <info@viktorbarzin.me>"
},
]
}
resource "kubernetes_persistent_volume_claim" "data_encrypted" {
wait_until_bound = false
metadata {
name = "affine-data-encrypted"
namespace = kubernetes_namespace.affine.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm-encrypted"
resources {
requests = {
storage = "1Gi"
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
resource "kubernetes_deployment" "affine" {
metadata {
name = "affine"
namespace = kubernetes_namespace.affine.metadata[0].name
labels = {
app = "affine"
tier = local.tiers.aux
}
annotations = {
"reloader.stakater.com/auto" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "affine"
}
}
template {
metadata {
labels = {
app = "affine"
}
annotations = {
"diun.enable" = "true"
"diun.include_tags" = "^\\d+\\.\\d+\\.\\d+$"
"dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432,redis-master.redis:6379"
}
}
spec {
# Init container to run database migrations
init_container {
name = "migration"
image = "ghcr.io/toeverything/affine:0.26.6"
command = ["sh", "-c", "npx prisma migrate deploy && SERVER_FLAVOR=script node ./dist/main.js run"]
dynamic "env" {
for_each = local.common_env
content {
name = env.value.name
value = env.value.value
}
}
env {
name = "DATABASE_URL"
value_from {
secret_key_ref {
name = "affine-db-creds"
key = "DATABASE_URL"
}
}
}
volume_mount {
name = "data"
mount_path = "/root/.affine/storage"
sub_path = "storage"
}
volume_mount {
name = "data"
mount_path = "/root/.affine/config"
sub_path = "config"
}
resources {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
memory = "512Mi"
}
}
}
container {
name = "affine"
image = "ghcr.io/toeverything/affine:0.26.6"
port {
container_port = 3010
}
dynamic "env" {
for_each = local.common_env
content {
name = env.value.name
value = env.value.value
}
}
env {
name = "DATABASE_URL"
value_from {
secret_key_ref {
name = "affine-db-creds"
key = "DATABASE_URL"
}
}
}
volume_mount {
name = "data"
mount_path = "/root/.affine/storage"
sub_path = "storage"
}
volume_mount {
name = "data"
mount_path = "/root/.affine/config"
sub_path = "config"
}
resources {
requests = {
memory = "128Mi"
cpu = "50m"
}
limits = {
memory = "512Mi"
}
}
liveness_probe {
http_get {
path = "/info"
port = 3010
}
initial_delay_seconds = 120
period_seconds = 30
timeout_seconds = 10
}
readiness_probe {
http_get {
path = "/info"
port = 3010
}
initial_delay_seconds = 60
period_seconds = 10
timeout_seconds = 5
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
spec[0].template[0].spec[0].init_container[0].image,
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "affine" {
metadata {
name = "affine"
namespace = kubernetes_namespace.affine.metadata[0].name
labels = {
app = "affine"
}
}
spec {
selector = {
app = "affine"
}
port {
name = "http"
port = 80
target_port = 3010
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
# auth = "app": AFFiNE has its own workspace auth + bearer-token API
# used by desktop/mobile sync clients. Authentik forward-auth was 302-ing
# those API callers; AFFiNE's own auth gates users.
auth = "app"
dns_type = "non-proxied"
namespace = kubernetes_namespace.affine.metadata[0].name
name = "affine"
tls_secret_name = var.tls_secret_name
max_body_size = "500m"
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Affine"
"gethomepage.dev/description" = "Knowledge base"
"gethomepage.dev/icon" = "affine.png"
"gethomepage.dev/group" = "Other"
"gethomepage.dev/pod-selector" = ""
}
}

View file

@ -0,0 +1,37 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
authentik = {
source = "goauthentik/authentik"
version = "~> 2024.10"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}

1
stacks/affine/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,13 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}
dependency "vault" {
config_path = "../vault"
skip_outputs = true
}

View file

@ -0,0 +1,70 @@
# Catch-all forward-auth restriction: gate the admin-only hostnames to the
# "Home Server Admins" group. Bound to the "Domain wide catch all" application
# (binding stays UI-managed; only the expression is adopted here).
#
# Adopted into Terraform 2026-06-04 to add a carve-out: the Kubernetes Dashboard
# (k8s.viktorbarzin.me) ALSO admits the kubernetes-* RBAC groups, so
# namespace-owners (e.g. gheorghe) can reach the dashboard login page. The
# dashboard itself enforces per-namespace access via the pasted ServiceAccount
# token (stacks/rbac/modules/rbac/dashboard-sa.tf) this policy only controls
# who reaches the page. All other admin-only hosts remain Home-Server-Admins-only.
import {
to = authentik_policy_expression.admin_services_restriction
id = "07a11b85-8f37-4844-aebb-ac9c112ec87c"
}
resource "authentik_policy_expression" "admin_services_restriction" {
name = "admin-services-restriction"
expression = trimspace(<<-EOT
ADMIN_ONLY_HOSTS = {
"terminal.viktorbarzin.me",
"frigate.viktorbarzin.me",
"netbox.viktorbarzin.me",
"trading.viktorbarzin.me",
"speedtest.viktorbarzin.me",
"meshcentral.viktorbarzin.me",
"k8s.viktorbarzin.me",
"dashy.viktorbarzin.me",
"prowlarr.viktorbarzin.me",
"qbittorrent.viktorbarzin.me",
"listenarr.viktorbarzin.me",
"shlink.viktorbarzin.me",
"openclaw.viktorbarzin.me",
"openlobster.viktorbarzin.me",
"wealthfolio.viktorbarzin.me",
}
ADMIN_GROUP = "Home Server Admins"
# The K8s Dashboard additionally admits the Kubernetes RBAC groups. Access
# to the page is not the security boundary the pasted ServiceAccount token
# is (per-namespace admin + cluster read-only). See dashboard-sa.tf.
K8S_DASHBOARD_HOST = "k8s.viktorbarzin.me"
K8S_DASHBOARD_GROUPS = [
"Home Server Admins",
"kubernetes-admins",
"kubernetes-power-users",
"kubernetes-namespace-owners",
]
host = request.context.get("host", "")
# t3 Workstation edge gate: only members of "T3 Users" may reach t3.
# Placed BEFORE the ADMIN_ONLY_HOSTS early-return (t3 is intentionally not in
# that set it must not require Home-Server-Admins, just T3 Users membership).
if host == "t3.viktorbarzin.me":
return ak_is_group_member(request.user, name="T3 Users")
# Not an admin-only host: allow any authenticated user.
if host not in ADMIN_ONLY_HOSTS:
return True
# K8s Dashboard: allow admins OR any Kubernetes RBAC group.
if host == K8S_DASHBOARD_HOST:
return any(ak_is_group_member(request.user, name=g) for g in K8S_DASHBOARD_GROUPS)
# Every other admin-only host: Home Server Admins only.
return ak_is_group_member(request.user, name=ADMIN_GROUP)
EOT
)
}

View file

@ -0,0 +1,200 @@
# goauthentik/authentik Terraform provider.
#
# Adopted 2026-04-18 (Wave 6a of the state-drift consolidation plan) to bring
# the catch-all Proxy Provider previously managed only via the Authentik UI
# under Terraform management. API token lives in Vault
# `secret/authentik/tf_api_token` (token identifier `terraform-infra-stack`,
# intent API, user akadmin, no expiry). Required-providers declaration sits
# in the central terragrunt.hcl so every stack has it available; only this
# stack configures a provider block.
data "vault_kv_secret_v2" "authentik_tf" {
mount = "secret"
name = "authentik"
}
provider "authentik" {
url = "https://authentik.viktorbarzin.me"
token = data.vault_kv_secret_v2.authentik_tf.data["tf_api_token"]
}
data "authentik_flow" "default_authorization_implicit_consent" {
slug = "default-provider-authorization-implicit-consent"
}
data "authentik_flow" "default_provider_invalidation" {
slug = "default-provider-invalidation-flow"
}
# -----------------------------------------------------------------------------
# Catch-all Proxy Provider + Application.
#
# Created via the Authentik UI ~a year ago; adopted into Terraform 2026-04-18
# (Wave 6a). The proxy provider is consumed by the embedded outpost
# (uuid 0eecac07-97c7-443c-8925-05f2f4fe3e47) via an outpost-level binding
# that stays in the UI it's a single toggle with no drift risk.
# -----------------------------------------------------------------------------
resource "authentik_application" "catchall" {
name = "Domain wide catch all"
slug = "domain-wide-catch-all"
protocol_provider = authentik_provider_proxy.catchall.id
lifecycle {
ignore_changes = [meta_description, meta_launch_url, meta_icon, group, backchannel_providers, policy_engine_mode, open_in_new_tab]
}
}
resource "authentik_provider_proxy" "catchall" {
name = "Provider for Domain wide catch all"
mode = "forward_domain"
external_host = "https://authentik.viktorbarzin.me"
cookie_domain = "viktorbarzin.me"
# Flow UUIDs resolved dynamically so a flow re-creation (keeping the slug)
# doesn't require an HCL edit.
authorization_flow = data.authentik_flow.default_authorization_implicit_consent.id
invalidation_flow = data.authentik_flow.default_provider_invalidation.id
# Cookie / proxysession TTL. Drives `Max-Age` on `authentik_proxy_*`
# cookies and the `expires` column in `authentik_providers_proxy_proxysession`.
# See note on the embedded outpost below bumping this requires an outpost
# pod restart for the gorilla session store to rebind.
access_token_validity = "weeks=4"
lifecycle {
ignore_changes = [property_mappings, jwt_federation_sources, skip_path_regex, internal_host, basic_auth_enabled, basic_auth_password_attribute, basic_auth_username_attribute, intercept_header_auth]
}
}
# -----------------------------------------------------------------------------
# Embedded outpost record. Adopted into Terraform 2026-05-10 as part of the
# postgres-session-backend fix:
# - `managed` is set server-side to `goauthentik.io/outposts/embedded` so
# the outpost binary's `IsEmbedded()` check returns true it loads the
# PostgreSQL session backend (PR #16628). The Terraform provider does
# NOT expose `managed` in the schema, so the field is preserved across
# applies (TF only writes fields it knows about).
# - kubernetes_json_patches.deployment carries:
# * dshm 2Gi tmpfs (covers the 2026-04-18 ENOSPC class of issues)
# * resources requests/limits
# * `app.kubernetes.io/component=server` pod label so the K8s service
# selector lights up endpoints (works around goauthentik 2026.2.2
# service.py:52 selector mismatch on standalone embedded outposts).
# * AUTHENTIK_POSTGRESQL__{HOST,PORT,USER,PASSWORD,NAME} envFrom the
# shared `goauthentik` Secret so the postgres session backend has
# credentials to connect to the dbaas cluster.
# - kubernetes_json_patches.service replaces the controller-set selector
# (which incorrectly targets `app.kubernetes.io/name=authentik`, i.e.
# the goauthentik-server pods) with the outpost's own labels.
# -----------------------------------------------------------------------------
resource "authentik_outpost" "embedded" {
name = "authentik Embedded Outpost"
type = "proxy"
protocol_providers = [authentik_provider_proxy.catchall.id]
service_connection = "99e227a7-4562-4888-9660-4c27da678c50"
config = jsonencode({
log_level = "trace"
docker_labels = null
authentik_host = "https://authentik.viktorbarzin.me/"
docker_network = null
container_image = null
docker_map_ports = true
refresh_interval = "minutes=5"
kubernetes_replicas = 1
kubernetes_namespace = "authentik"
authentik_host_browser = ""
object_naming_template = "ak-outpost-%(name)s"
authentik_host_insecure = false
kubernetes_service_type = "ClusterIP"
kubernetes_ingress_path_type = null
kubernetes_image_pull_secrets = []
kubernetes_ingress_class_name = null
kubernetes_disabled_components = []
kubernetes_ingress_annotations = {}
kubernetes_ingress_secret_name = "authentik-outpost-tls"
kubernetes_httproute_annotations = {}
kubernetes_httproute_parent_refs = []
kubernetes_json_patches = {
deployment = [
{
op = "add"
path = "/spec/template/spec/volumes"
value = [{ name = "dshm", emptyDir = { medium = "Memory", sizeLimit = "2Gi" } }]
},
{
op = "add"
path = "/spec/template/spec/containers/0/volumeMounts"
value = [{ name = "dshm", mountPath = "/dev/shm" }]
},
{
op = "add"
path = "/spec/template/spec/containers/0/resources"
value = { limits = { memory = "2560Mi" }, requests = { cpu = "100m", memory = "128Mi" } }
},
{
op = "add"
path = "/spec/template/metadata/labels/app.kubernetes.io~1component"
value = "server"
},
{
op = "add"
path = "/spec/template/spec/containers/0/env/-"
value = { name = "AUTHENTIK_POSTGRESQL__HOST", valueFrom = { secretKeyRef = { name = "goauthentik", key = "AUTHENTIK_POSTGRESQL__HOST" } } }
},
{
op = "add"
path = "/spec/template/spec/containers/0/env/-"
value = { name = "AUTHENTIK_POSTGRESQL__PORT", valueFrom = { secretKeyRef = { name = "goauthentik", key = "AUTHENTIK_POSTGRESQL__PORT" } } }
},
{
op = "add"
path = "/spec/template/spec/containers/0/env/-"
value = { name = "AUTHENTIK_POSTGRESQL__USER", valueFrom = { secretKeyRef = { name = "goauthentik", key = "AUTHENTIK_POSTGRESQL__USER" } } }
},
{
op = "add"
path = "/spec/template/spec/containers/0/env/-"
value = { name = "AUTHENTIK_POSTGRESQL__PASSWORD", valueFrom = { secretKeyRef = { name = "goauthentik", key = "AUTHENTIK_POSTGRESQL__PASSWORD" } } }
},
{
op = "add"
path = "/spec/template/spec/containers/0/env/-"
value = { name = "AUTHENTIK_POSTGRESQL__NAME", valueFrom = { secretKeyRef = { name = "goauthentik", key = "AUTHENTIK_POSTGRESQL__NAME" } } }
},
]
service = [
{
op = "replace"
path = "/spec/selector"
value = {
"app.kubernetes.io/managed-by" = "goauthentik.io"
"app.kubernetes.io/name" = "authentik-outpost-proxy"
"goauthentik.io/outpost-name" = "authentik-embedded-outpost"
"goauthentik.io/outpost-type" = "proxy"
"goauthentik.io/outpost-uuid" = "0eecac0797c7443c892505f2f4fe3e47"
}
},
]
}
})
}
# -----------------------------------------------------------------------------
# Default User Login stage bound to default-authentication-flow.
# Adopted into Terraform 2026-05-01 to set session_duration=weeks=4 so users
# stay logged in across browser restarts. There is no Brand.session_duration
# in authentik 2026.2.x UserLoginStage is the correct knob.
# -----------------------------------------------------------------------------
resource "authentik_stage_user_login" "default_login" {
name = "default-authentication-login"
session_duration = "weeks=4"
lifecycle {
# Pin only session_duration; everything else stays UI-managed so the
# plan doesn't churn unrelated knobs (e.g. remember_me_offset toggles).
ignore_changes = [
remember_me_offset,
terminate_other_sessions,
geoip_binding,
network_binding,
]
}
}

217
stacks/authentik/guest.tf Normal file
View file

@ -0,0 +1,217 @@
# =============================================================================
# Public Guest user + auto-login flow + public proxy provider + dedicated
# outpost.
#
# Backs the `auth = "public"` tier of the ingress_factory module. Architecture:
#
# * `guest` user (in `Public Guests` group, NOT `Allow Login Users`).
# * `public-auto-login` flow: anonymous user enters expression policy sets
# `pending_user = guest` user_login stage logs them in. No UI shown.
# * `Provider for Public` proxy provider (forward_domain, cookie_domain
# `viktorbarzin.me`) with `authentication_flow = public-auto-login`.
# * Dedicated `Public Outpost` Deployment+Service (managed by Authentik's
# K8s controller). Bound to the public provider only there is no other
# provider claiming `viktorbarzin.me` on this outpost, so every request
# it sees runs the public flow regardless of host.
# * `public-auth.viktorbarzin.me` ingress exposes the public outpost's
# `/outpost.goauthentik.io/*` path so OAuth callbacks land on it (the
# embedded outpost doesn't know about the public provider, so callbacks
# can't go to authentik.viktorbarzin.me).
#
# Traffic flow for a stranger hitting an `auth = "public"` ingress:
# 1. Traefik's `authentik-forward-auth-public` middleware public outpost.
# 2. No session cookie 302 to `https://authentik.viktorbarzin.me/...`
# with redirect_uri = `https://public-auth.viktorbarzin.me/.../callback`.
# 3. Authentik runs `public-auto-login` (no UI), issues session.
# 4. 302 public-auth.viktorbarzin.me callback public outpost validates
# state and sets `authentik_proxy_<public-hash>` cookie on `viktorbarzin.me`.
# 5. 302 original URL Traefik retries forward_auth public outpost
# validates cookie 200 with `X-authentik-username: guest`.
#
# A user already logged into anything else on viktorbarzin.me (the catchall)
# still gets recognised here Authentik prefers an existing session and the
# public provider's authorization_flow auto-approves anyone, so their real
# username shows up in `X-authentik-username`. Strangers get `guest`.
# =============================================================================
resource "authentik_user" "guest" {
username = "guest"
name = "Guest"
path = "users/system"
is_active = true
type = "internal"
# No password set: the user_login stage in `public_auto_login` logs the
# request in via pending_user pre-set by an expression policy. There is no
# UI path for `guest` to authenticate via password the user is also kept
# out of `Allow Login Users`, so even a leaked password cannot be used to
# complete the standard login flow.
lifecycle {
ignore_changes = [attributes, email]
}
}
resource "authentik_group" "public_guests" {
name = "Public Guests"
users = [authentik_user.guest.id]
# NOT a child of "Allow Login Users" keeps a hypothetical leaked password
# from promoting `guest` to a real user via the standard login flow.
}
# Pre-stage policy: sets pending_user = guest before user_login stage runs.
# Mutates `request.context["flow_plan"].context["pending_user"]` the
# canonical pattern (the user_login stage reads pending_user from
# `flow_plan.context`). Direct `request.context["pending_user"]` mutations
# don't propagate, since policy request.context is not the same dict as
# flow_plan.context.
resource "authentik_policy_expression" "set_guest_user" {
name = "set-public-guest-user"
expression = trimspace(<<-EOT
request.context["flow_plan"].context["pending_user"] = ak_user_by(username="guest")
return True
EOT
)
}
# Dedicated user_login stage for the public flow. 4-week session matches the
# default authentication stage; means a stranger only goes through the auto-
# bind once per ~month per device.
resource "authentik_stage_user_login" "public_guest_login" {
name = "public-guest-login"
session_duration = "weeks=4"
}
# `authentication = "none"` lets anonymous requests run the flow.
# `designation = "authentication"` because the flow's outcome is "request is
# now authenticated as guest"; the public proxy provider's authorization_flow
# then runs implicit consent.
resource "authentik_flow" "public_auto_login" {
name = "Public Auto Login"
slug = "public-auto-login"
title = "Public Guest Login"
designation = "authentication"
authentication = "none"
}
resource "authentik_flow_stage_binding" "public_login" {
target = authentik_flow.public_auto_login.uuid
stage = authentik_stage_user_login.public_guest_login.id
order = 10
# Re-evaluate at stage runtime: at plan time, flow_plan may not yet be in
# request.context, so the expression policy's mutation would no-op. With
# evaluate_on_plan=false + re_evaluate_policies=true, the policy fires
# right before the stage runs, when flow_plan is fully populated.
evaluate_on_plan = false
re_evaluate_policies = true
}
resource "authentik_policy_binding" "set_guest_before_login" {
target = authentik_flow_stage_binding.public_login.id
policy = authentik_policy_expression.set_guest_user.id
order = 0
}
# -----------------------------------------------------------------------------
# Public proxy provider forward_domain so it claims any host on
# viktorbarzin.me. Used only on the dedicated `public` outpost (where it is
# the sole bound provider), so there's no dispatch ambiguity with the
# catchall (which lives on the embedded outpost).
# -----------------------------------------------------------------------------
resource "authentik_provider_proxy" "public" {
name = "Provider for Public"
mode = "forward_domain"
external_host = "https://public-auth.viktorbarzin.me"
cookie_domain = "viktorbarzin.me"
# When a request hits with NO Authentik session, this flow runs first and
# auto-binds the request to the `guest` user (no UI prompt).
authentication_flow = authentik_flow.public_auto_login.uuid
# Once authenticated (or already authenticated), implicit-consent auto-approves.
authorization_flow = data.authentik_flow.default_authorization_implicit_consent.id
invalidation_flow = data.authentik_flow.default_provider_invalidation.id
access_token_validity = "weeks=4"
lifecycle {
ignore_changes = [property_mappings, jwt_federation_sources, skip_path_regex, internal_host, basic_auth_enabled, basic_auth_password_attribute, basic_auth_username_attribute, intercept_header_auth]
}
}
resource "authentik_application" "public" {
name = "Public"
slug = "public"
protocol_provider = authentik_provider_proxy.public.id
# No bound policies. policy_engine_mode = "any" + zero bindings = everyone
# passes (the auto-login flow has already established `guest` as the user).
policy_engine_mode = "any"
lifecycle {
ignore_changes = [meta_description, meta_launch_url, meta_icon, group, backchannel_providers, open_in_new_tab]
}
}
# Dedicated outpost so the public provider can claim viktorbarzin.me without
# colliding with the catchall (which already claims viktorbarzin.me on the
# embedded outpost). Authentik's K8s controller deploys this as
# `ak-outpost-public` (Deployment + Service in the `authentik` namespace).
resource "authentik_outpost" "public" {
name = "public"
type = "proxy"
protocol_providers = [authentik_provider_proxy.public.id]
service_connection = "99e227a7-4562-4888-9660-4c27da678c50"
config = jsonencode({
log_level = "info"
docker_labels = null
authentik_host = "https://authentik.viktorbarzin.me/"
docker_network = null
container_image = null
docker_map_ports = true
refresh_interval = "minutes=5"
kubernetes_replicas = 1
kubernetes_namespace = "authentik"
authentik_host_browser = ""
object_naming_template = "ak-outpost-%(name)s"
authentik_host_insecure = false
kubernetes_service_type = "ClusterIP"
kubernetes_ingress_path_type = null
kubernetes_image_pull_secrets = []
kubernetes_ingress_class_name = null
kubernetes_disabled_components = []
kubernetes_ingress_annotations = {}
kubernetes_ingress_secret_name = "authentik-outpost-tls"
kubernetes_httproute_annotations = {}
kubernetes_httproute_parent_refs = []
kubernetes_json_patches = {
deployment = [
{
op = "add"
path = "/spec/template/spec/containers/0/resources"
value = { limits = { memory = "256Mi" }, requests = { cpu = "10m", memory = "64Mi" } }
},
]
}
})
}
# Ingress for `public-auth.viktorbarzin.me` exposes the public outpost's
# /outpost.goauthentik.io/* path so OAuth callbacks land on it. The
# `Provider for Public` external_host points here, so all redirect_uris in
# the OAuth flow resolve to this hostname.
module "ingress_public_outpost" {
source = "../../modules/kubernetes/ingress_factory"
# Public-tier outpost callback the OAuth flow's redirect_uris all resolve
# here; gating it with forward-auth would loop the public outpost onto itself.
# auth = "none": Public outpost callback path for OAuth flow; protecting with forward-auth creates circular dependency.
auth = "none"
namespace = "authentik"
name = "public-outpost"
host = "public-auth"
service_name = "ak-outpost-public"
port = 9000
ingress_path = ["/outpost.goauthentik.io"]
tls_secret_name = var.tls_secret_name
dns_type = "proxied"
anti_ai_scraping = false
exclude_crowdsec = true
homepage_enabled = false
depends_on = [authentik_outpost.public]
}

25
stacks/authentik/main.tf Normal file
View file

@ -0,0 +1,25 @@
# =============================================================================
# Authentik Stack Identity provider (SSO)
# =============================================================================
variable "tls_secret_name" { type = string }
variable "redis_host" { type = string }
data "vault_kv_secret_v2" "secrets" {
mount = "secret"
name = "platform"
}
locals {
homepage_credentials = jsondecode(data.vault_kv_secret_v2.secrets.data["homepage_credentials"])
}
module "authentik" {
source = "./modules/authentik"
tier = local.tiers.cluster
tls_secret_name = var.tls_secret_name
secret_key = data.vault_kv_secret_v2.secrets.data["authentik_secret_key"]
postgres_password = data.vault_kv_secret_v2.secrets.data["authentik_postgres_password"]
redis_host = var.redis_host
homepage_token = try(local.homepage_credentials["authentik"]["token"], "")
}

View file

@ -0,0 +1,113 @@
variable "tls_secret_name" {}
variable "secret_key" {}
variable "postgres_password" {}
variable "tier" { type = string }
variable "redis_host" { type = string }
variable "homepage_token" {
type = string
default = ""
sensitive = true
}
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.authentik.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# The embedded outpost auto-creates an ingress expecting this secret name
module "tls_secret_outpost" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.authentik.metadata[0].name
tls_secret_name = "authentik-outpost-tls"
}
resource "kubernetes_namespace" "authentik" {
metadata {
name = "authentik"
labels = {
tier = var.tier
"resource-governance/custom-quota" = "true"
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
resource "kubernetes_resource_quota" "authentik" {
metadata {
name = "authentik-quota"
namespace = kubernetes_namespace.authentik.metadata[0].name
}
spec {
hard = {
"requests.cpu" = "16"
"requests.memory" = "16Gi"
"limits.memory" = "96Gi"
pods = "50"
}
}
}
resource "helm_release" "authentik" {
namespace = kubernetes_namespace.authentik.metadata[0].name
create_namespace = true
name = "goauthentik"
repository = "https://charts.goauthentik.io/"
chart = "authentik"
# version = "2025.10.3"
# version = "2025.12.4"
version = "2026.2.2"
atomic = true
timeout = 6000
values = [templatefile("${path.module}/values.yaml", { postgres_password = var.postgres_password, secret_key = var.secret_key })]
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
# Authentik's own UI cannot be gated by Authentik forward-auth that
# creates a chicken-and-egg loop (users can't reach the login page).
# auth = "none": Authentik UI cannot be gated by Authentik forward-auth (chicken-and-egg loop prevents login).
auth = "none"
dns_type = "proxied"
namespace = kubernetes_namespace.authentik.metadata[0].name
name = "authentik"
service_name = "goauthentik-server"
tls_secret_name = var.tls_secret_name
anti_ai_scraping = false
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Authentik"
"gethomepage.dev/description" = "Identity provider"
"gethomepage.dev/icon" = "authentik.png"
"gethomepage.dev/group" = "Identity & Security"
"gethomepage.dev/pod-selector" = ""
"gethomepage.dev/widget.type" = "authentik"
"gethomepage.dev/widget.url" = "http://goauthentik-server.authentik.svc.cluster.local"
"gethomepage.dev/widget.key" = var.homepage_token
}
}
module "ingress-outpost" {
source = "../../../../modules/kubernetes/ingress_factory"
# Authentik forward-auth outpost callback path protecting this with
# forward-auth would loop the outpost back onto itself.
# auth = "none": Authentik outpost callback path for forward-auth flow; protecting with forward-auth creates circular dependency.
auth = "none"
namespace = kubernetes_namespace.authentik.metadata[0].name
name = "authentik-outpost"
host = "authentik"
service_name = "ak-outpost-authentik-embedded-outpost"
port = 9000
ingress_path = ["/outpost.goauthentik.io"]
tls_secret_name = var.tls_secret_name
anti_ai_scraping = false
exclude_crowdsec = true
}

View file

@ -0,0 +1,14 @@
[databases]
authentik = host=postgresql.dbaas port=5432 dbname=authentik user=authentik password=${password}
[pgbouncer]
listen_addr = 0.0.0.0
listen_port = 6432
auth_type = md5
auth_file = /etc/pgbouncer/userlist.txt
pool_mode = session
max_client_conn = 200
default_pool_size = 20
reserve_pool_size = 5
reserve_pool_timeout = 5
ignore_startup_parameters = extra_float_digits

View file

@ -0,0 +1,207 @@
resource "kubernetes_config_map" "pgbouncer_config" {
metadata {
name = "pgbouncer-config"
namespace = "authentik"
}
data = {
"pgbouncer.ini" = templatefile("${path.module}/pgbouncer.ini", { password = var.postgres_password })
}
}
# --- 2 Secret for user credentials ---
resource "kubernetes_secret" "pgbouncer_auth" {
metadata {
name = "pgbouncer-auth"
namespace = "authentik"
}
data = {
"userlist.txt" = templatefile("${path.module}/userlist.txt", { password = var.postgres_password })
}
type = "Opaque"
}
# --- 3 Deployment ---
resource "kubernetes_deployment" "pgbouncer" {
metadata {
name = "pgbouncer"
namespace = "authentik"
labels = {
app = "pgbouncer"
tier = var.tier
}
}
spec {
replicas = 3
selector {
match_labels = {
app = "pgbouncer"
}
}
template {
metadata {
labels = {
app = "pgbouncer"
}
}
spec {
affinity {
pod_anti_affinity {
required_during_scheduling_ignored_during_execution {
label_selector {
match_expressions {
key = "component"
operator = "In"
values = ["server"]
}
}
topology_key = "kubernetes.io/hostname"
}
}
}
container {
name = "pgbouncer"
image = "edoburu/pgbouncer:latest"
# `:latest` tag keep `Always` so pod restarts pick up upstream
# updates. The previous `IfNotPresent` value was declared at module
# creation but the live cluster has reconciled to `Always` (likely
# via a Helm/operator default). Match reality to drop the drift.
image_pull_policy = "Always"
port {
container_port = 6432
}
resources {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
memory = "512Mi"
}
}
readiness_probe {
tcp_socket {
port = 6432
}
initial_delay_seconds = 5
period_seconds = 10
timeout_seconds = 3
failure_threshold = 3
}
liveness_probe {
tcp_socket {
port = 6432
}
initial_delay_seconds = 30
period_seconds = 30
timeout_seconds = 5
failure_threshold = 3
}
volume_mount {
name = "config"
mount_path = "/etc/pgbouncer/pgbouncer.ini"
sub_path = "pgbouncer.ini"
}
volume_mount {
name = "auth"
mount_path = "/etc/pgbouncer/userlist.txt"
sub_path = "userlist.txt"
}
env {
name = "DATABASES_AUTHENTIK"
value = "host=postgres port=5432 dbname=authentik user=authentik password=${var.postgres_password}"
}
}
volume {
name = "config"
config_map {
name = kubernetes_config_map.pgbouncer_config.metadata[0].name
}
}
volume {
name = "auth"
secret {
secret_name = kubernetes_secret.pgbouncer_auth.metadata[0].name
}
}
dns_config {
option {
name = "ndots"
value = "2"
}
}
}
}
}
depends_on = [kubernetes_secret.pgbouncer_auth]
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
# --- 3b PodDisruptionBudget ---
# Protects auth against simultaneous node drains. With 3 replicas and
# minAvailable=2, a single drain rolls cleanly; a simultaneous two-node
# outage is correctly blocked.
resource "kubernetes_pod_disruption_budget_v1" "pgbouncer" {
metadata {
name = "pgbouncer"
namespace = "authentik"
}
spec {
min_available = 2
selector {
match_labels = {
app = "pgbouncer"
}
}
}
}
# --- 4 Service ---
resource "kubernetes_service" "pgbouncer" {
metadata {
name = "pgbouncer"
namespace = "authentik"
}
spec {
selector = {
app = "pgbouncer"
}
port {
port = 6432
target_port = 6432
protocol = "TCP"
}
type = "ClusterIP"
}
}

View file

@ -0,0 +1 @@
"authentik" "${password}"

View file

@ -0,0 +1,113 @@
authentik:
log_level: warning
# log_level: trace
secret_key: ""
existingSecret:
secretName: "goauthentik"
# This sends anonymous usage-data, stack traces on errors and
# performance data to authentik.error-reporting.a7k.io, and is fully opt-in
error_reporting:
enabled: false
postgresql:
# host: postgresql.dbaas
host: pgbouncer.authentik
port: 6432
user: authentik
password: ""
# Persistent client-side connections (safe with PgBouncer session mode;
# must be < pgbouncer server_idle_timeout=600s). Cuts Django connection
# setup overhead off the ~70 sequential ORM ops per flow stage.
conn_max_age: 60
conn_health_checks: true
cache:
# Cache flow plans for 30m and policy evaluations for 15m. Authentik 2026.2
# moved cache storage from Redis to Postgres, so a TTL hit is still a
# SELECT — but a single indexed lookup beats re-evaluating PolicyBindings.
timeout_flows: 1800
timeout_policies: 900
web:
# Gunicorn: 3 workers × 4 threads per server pod (default 2×4).
# Pairs with the server memory bump to 2Gi (each worker preloads Django ~500Mi).
workers: 3
threads: 4
worker:
# Celery-equivalent worker threads per pod (default 2, renamed from
# AUTHENTIK_WORKER__CONCURRENCY in 2025.8).
threads: 4
server:
replicas: 3
# Anonymous Django sessions (no completed login: bots, healthcheckers,
# partial flows) expire in 2h. Default is days=1. Once login completes,
# UserLoginStage.session_duration takes over via request.session.set_expiry.
# Injected via server.env (not authentik.sessions.*) because we use
# authentik.existingSecret.secretName, which makes the chart skip
# rendering the AUTHENTIK_* secret — so the values block doesn't reach env.
env:
- name: AUTHENTIK_SESSIONS__UNAUTHENTICATED_AGE
value: "hours=2"
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
resources:
requests:
cpu: 100m
memory: 1.5Gi
limits:
memory: 2Gi
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app.kubernetes.io/component: server
ingress:
enabled: false
# hosts:
# - authentik.viktorbarzin.me
podAnnotations:
diun.enable: true
diun.include_tags: "^202[0-9].[0-9]+.*$" # no need to annotate the worker as it uses the same image
pdb:
enabled: true
minAvailable: 2
global:
addPrometheusAnnotations: true
worker:
# 2 replicas: workers handle background tasks (LDAP sync, email,
# certificate renewal) — no user-facing traffic, so 2-of-3 isn't
# needed for availability. Drop saves ~100m sustained CPU.
replicas: 2
# Same unauthenticated_age cap as server — both the server (Django session
# middleware) and worker (cleanup tasks) need to see the value.
env:
- name: AUTHENTIK_SESSIONS__UNAUTHENTICATED_AGE
value: "hours=2"
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
resources:
requests:
cpu: 100m
memory: 1.5Gi
limits:
memory: 2Gi
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app.kubernetes.io/component: worker
pdb:
enabled: true
maxUnavailable: 1
postgresql:
enabled: false

1
stacks/authentik/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,33 @@
# "T3 Users" group gates the devvm Claude Code Workstation (t3.viktorbarzin.me)
# at the Authentik edge (the branch in admin-services-restriction.tf). The group
# is created WITH its members atomically so enabling the gate can never lock
# everyone (incl. wizard) out.
#
# emo / ancamilea / wizard are NOT Terraform-managed authentik_user resources in
# this stack, so they're looked up by username which in this Authentik instance
# IS the user's email (verified live 2026-06-08): vbarzin@gmail.com, etc.
#
# Membership is in HCL for now (matches the roster's 3 users). FUTURE: when the
# devvm provisioner reconciles T3 Users membership from roster.yaml via the
# Authentik API, drop the `users` arg here so TF owns only the group's existence.
data "authentik_user" "wizard" {
username = "vbarzin@gmail.com"
}
data "authentik_user" "emo" {
username = "emil.barzin@gmail.com"
}
data "authentik_user" "ancamilea" {
username = "ancaelena98@gmail.com"
}
resource "authentik_group" "t3_users" {
name = "T3 Users"
users = [
data.authentik_user.wizard.id,
data.authentik_user.emo.id,
data.authentik_user.ancamilea.id,
]
}

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "infra" {
config_path = "../infra"
skip_outputs = true
}

View file

@ -0,0 +1 @@
skip = true

View file

@ -0,0 +1,10 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
data "vault_kv_secret_v2" "cf_platform" {
mount = "secret"
name = "platform"
}
provider "cloudflare" {
api_key = data.vault_kv_secret_v2.cf_platform.data["cloudflare_api_key"]
email = "vbarzin@gmail.com"
}

1081
stacks/beads-server/main.tf Normal file

File diff suppressed because it is too large Load diff

1
stacks/beads-server/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,3 @@
include "root" {
path = find_in_parent_folders()
}

View file

@ -0,0 +1,10 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
locals {
tiers = {
core = "0-core"
cluster = "1-cluster"
gpu = "2-gpu"
edge = "3-edge"
aux = "4-aux"
}
}

89
stacks/blog/.terraform.lock.hcl generated Normal file
View file

@ -0,0 +1,89 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.7"
constraints = "~> 4.0"
hashes = [
"h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
"zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
"zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
"zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
"zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
"zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
"zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
"zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
"zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
"zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
"zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
"zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
"zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
"zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
]
}
provider "registry.terraform.io/gavinbunney/kubectl" {
version = "1.19.0"
constraints = "~> 1.14"
hashes = [
"h1:9QkxPjp0x5FZFfJbE+B7hBOoads9gmdfj9aYu5N4Sfc=",
]
}
provider "registry.terraform.io/goauthentik/authentik" {
version = "2024.12.1"
constraints = "~> 2024.10"
hashes = [
"h1:roBMd+gi+TGgikH/bMzEI8JfvJiMAQWt+8FmokCrQIs=",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.1.0"
hashes = [
"h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=",
]
}
provider "registry.terraform.io/hashicorp/vault" {
version = "4.8.0"
constraints = "~> 4.0"
hashes = [
"h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
"h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
"zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
"zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
"zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
"zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
"zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
"zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
"zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
"zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
"zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
"zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
]
}

186
stacks/blog/main.tf Normal file
View file

@ -0,0 +1,186 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
resource "kubernetes_namespace" "website" {
metadata {
name = "website"
labels = {
"istio-injection" : "disabled"
tier = local.tiers.aux
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.website.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "blog" {
metadata {
name = "blog"
namespace = kubernetes_namespace.website.metadata[0].name
labels = {
run = "blog"
tier = local.tiers.aux
}
}
spec {
replicas = 1
selector {
match_labels = {
run = "blog"
}
}
template {
metadata {
labels = {
run = "blog"
}
}
spec {
container {
image = "viktorbarzin/blog:latest"
name = "blog"
resources {
limits = {
memory = "64Mi"
}
requests = {
cpu = "10m"
memory = "64Mi"
}
}
port {
container_port = 80
}
}
container {
image = "nginx/nginx-prometheus-exporter"
name = "nginx-exporter"
args = ["-nginx.scrape-uri", "http://127.0.0.1:8080/nginx_status"]
port {
container_port = 9113
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
spec[0].template[0].spec[0].container[1].image,
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "blog" {
metadata {
name = "blog"
namespace = kubernetes_namespace.website.metadata[0].name
labels = {
"run" = "blog"
}
annotations = {
"prometheus.io/scrape" = "true"
"prometheus.io/path" = "/metrics"
"prometheus.io/port" = "9113"
}
}
spec {
selector = {
run = "blog"
}
port {
name = "http"
port = "80"
target_port = "80"
}
port {
name = "prometheus"
port = "9113"
target_port = "9113"
}
}
}
# Anubis reverse proxy in front of the blog. First-time visitors solve a
# tiny PoW (~250ms desktop), get a 30-day cookie, and pass through. Replaces
# the global ai-bot-block forwardAuth for this site.
module "anubis" {
source = "../../modules/kubernetes/anubis_instance"
name = "blog"
namespace = kubernetes_namespace.website.metadata[0].name
target_url = "http://${kubernetes_service.blog.metadata[0].name}.${kubernetes_namespace.website.metadata[0].name}.svc.cluster.local"
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/10"
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
auth = "none" # Anubis-fronted; PoW challenge gates bots, no Authentik
namespace = kubernetes_namespace.website.metadata[0].name
name = "blog"
service_name = module.anubis.service_name
port = module.anubis.service_port
extra_middlewares = ["traefik-x402@kubernetescrd"]
full_host = "viktorbarzin.me"
dns_type = "proxied"
tls_secret_name = var.tls_secret_name
anti_ai_scraping = false # Anubis is the gatekeeper now drop the redundant ai-bot-block forwardAuth.
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Blog"
"gethomepage.dev/description" = "Personal blog"
"gethomepage.dev/icon" = "hugo.png"
"gethomepage.dev/group" = "Other"
"gethomepage.dev/pod-selector" = ""
}
}
# Carve-out for /net-diag.sh a curl|bash diagnostic script for macOS.
# Anubis can't gate this path because non-JS clients (curl) can't solve PoW.
# Points at the bare blog nginx service, bypassing the Anubis proxy.
module "ingress_net_diag" {
source = "../../modules/kubernetes/ingress_factory"
# auth = "none": public read-only static file (curl|bash diagnostic script). No login, no PoW.
auth = "none"
namespace = kubernetes_namespace.website.metadata[0].name
name = "blog-net-diag"
service_name = kubernetes_service.blog.metadata[0].name
port = "80"
ingress_path = ["/net-diag.sh"]
full_host = "viktorbarzin.me"
dns_type = "none" # DNS already owned by the main blog ingress.
tls_secret_name = var.tls_secret_name
anti_ai_scraping = false # Single static file; nothing for scrapers to mine.
}
# CI retrigger 2026-05-16T13:42:57+00:00 bulk enrollment apply (pipeline #689 killed)
# CI retrigger v2 2026-05-16T13:46:35+00:00
# CI retrigger v3 2026-05-16T14:06:39Z
# CI retrigger v4 2026-05-16T14:13:59Z
# CI retrigger v5 2026-05-16T23:10:38Z
# CI retrigger v6 2026-05-16T23:18:58Z

49
stacks/blog/providers.tf Normal file
View file

@ -0,0 +1,49 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
authentik = {
source = "goauthentik/authentik"
version = "~> 2024.10"
}
# kubectl (gavinbunney) workaround for hashicorp/kubernetes
# `kubernetes_manifest` panics on Kyverno CRDs. See beads code-e2dp.
# Declared for all stacks but only used where opted-in.
kubectl = {
source = "gavinbunney/kubectl"
version = "~> 1.14"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}
provider "kubectl" {
config_path = var.kube_config_path
load_config_file = true
}

1
stacks/blog/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}

968
stacks/broker-sync/main.tf Normal file
View file

@ -0,0 +1,968 @@
variable "nfs_server" { type = string }
variable "image_tag" {
type = string
default = "latest"
description = "broker-sync image tag. Use 8-char git SHA in CI; :latest only for local trials."
}
resource "kubernetes_namespace" "broker_sync" {
metadata {
name = "broker-sync"
labels = {
"istio-injection" = "disabled"
tier = local.tiers.aux
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
# Secrets for all providers. Seeded in Vault at `secret/broker-sync`:
# wf_base_url e.g. https://wealthfolio.viktorbarzin.me
# wf_username Wealthfolio login username
# wf_password Wealthfolio login password (cleartext; server stores Argon2id)
# trading212_api_keys JSON array of {account_id, account_type, api_key, name, currency}
# imap_host, imap_user, imap_password, imap_directory for InvestEngine + Schwab email ingest
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "broker-sync-secrets"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "broker-sync-secrets"
}
dataFrom = [{
extract = {
key = "broker-sync"
}
}]
}
}
depends_on = [kubernetes_namespace.broker_sync]
}
# Canonical data dir SQLite watermarks, FX cache, CSV drop/archive, Wealthfolio session cache.
# Encrypted because we're storing brokerage tokens, session cookies, and transaction history.
resource "kubernetes_persistent_volume_claim" "data_encrypted" {
wait_until_bound = false
metadata {
name = "broker-sync-data-encrypted"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm-encrypted"
resources {
requests = { storage = "1Gi" }
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
locals {
broker_sync_image = "viktorbarzin/broker-sync:${var.image_tag}"
# Shared env block for every CronJob: auth into Wealthfolio + data path.
common_env = [
{ name = "BROKER_SYNC_DATA_DIR", value = "/data", from = null },
{ name = "WF_SESSION_PATH", value = "/data/wealthfolio_session.json", from = null },
{ name = "WF_BASE_URL", value = null, from = "wf_base_url" },
{ name = "WF_USERNAME", value = null, from = "wf_username" },
{ name = "WF_PASSWORD", value = null, from = "wf_password" },
]
}
# Phase 0 liveness: proves the image + namespace + PVC + ESO wiring end-to-end.
# Suspended by default; toggle to false to run.
resource "kubernetes_cron_job_v1" "version_probe" {
metadata {
name = "broker-sync-version"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
labels = { app = "broker-sync", component = "version-probe" }
}
spec {
schedule = "0 1 * * *"
concurrency_policy = "Forbid"
successful_jobs_history_limit = 1
failed_jobs_history_limit = 3
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 86400
template {
metadata {
labels = { app = "broker-sync", component = "version-probe" }
}
spec {
restart_policy = "OnFailure"
container {
name = "broker-sync"
image = local.broker_sync_image
command = ["broker-sync", "version"]
resources {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "128Mi" }
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# Trading212 steady-state daily sync. Phase 1 deliverable.
resource "kubernetes_cron_job_v1" "trading212" {
metadata {
name = "broker-sync-trading212"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
labels = { app = "broker-sync", component = "trading212" }
}
spec {
schedule = "0 2 * * *" # 02:00 UK
concurrency_policy = "Forbid"
starting_deadline_seconds = 300
successful_jobs_history_limit = 3
failed_jobs_history_limit = 5
job_template {
metadata {}
spec {
backoff_limit = 2
ttl_seconds_after_finished = 86400
template {
metadata {
labels = { app = "broker-sync", component = "trading212" }
}
spec {
restart_policy = "OnFailure"
# See imap cron without fsGroup=10001 the broker user (uid=10001
# gid=999) can't write the sqlite3 journal next to /data/sync.db
# and the dedup.record() call after a successful WF import crashes
# with "attempt to write a readonly database".
security_context {
fs_group = 10001
}
container {
name = "broker-sync"
image = local.broker_sync_image
command = ["broker-sync", "trading212", "--mode", "steady"]
env {
name = "BROKER_SYNC_DATA_DIR"
value = "/data"
}
env {
name = "WF_SESSION_PATH"
value = "/data/wealthfolio_session.json"
}
env {
name = "WF_BASE_URL"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_base_url"
}
}
}
env {
name = "WF_USERNAME"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_username"
}
}
}
env {
name = "WF_PASSWORD"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_password"
}
}
}
env {
name = "T212_API_KEYS_JSON"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "trading212_api_keys"
}
}
}
volume_mount {
name = "data"
mount_path = "/data"
}
resources {
requests = { cpu = "20m", memory = "128Mi" }
limits = { memory = "256Mi" }
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# IBKR Flex Web Service daily sync. Phase 2c deliverable.
# Pulls the Activity Flex Query (Trades + Cash + OpenPositions), maps to
# broker-sync Activities, runs them through the shared pipeline, then
# reconciles broker-reported OpenPositions against WF-computed quantities.
resource "kubernetes_cron_job_v1" "ibkr" {
metadata {
name = "broker-sync-ibkr"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
labels = { app = "broker-sync", component = "ibkr" }
}
spec {
schedule = "0 2 * * *" # 02:00 UK
concurrency_policy = "Forbid"
starting_deadline_seconds = 300
successful_jobs_history_limit = 3
failed_jobs_history_limit = 5
job_template {
metadata {}
spec {
backoff_limit = 2
ttl_seconds_after_finished = 86400
template {
metadata {
labels = { app = "broker-sync", component = "ibkr" }
}
spec {
restart_policy = "OnFailure"
security_context {
fs_group = 10001
}
container {
name = "broker-sync"
image = local.broker_sync_image
command = ["broker-sync", "ibkr"]
env {
name = "BROKER_SYNC_DATA_DIR"
value = "/data"
}
env {
name = "WF_SESSION_PATH"
value = "/data/wealthfolio_session.json"
}
env {
name = "WF_BASE_URL"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_base_url"
}
}
}
env {
name = "WF_USERNAME"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_username"
}
}
}
env {
name = "WF_PASSWORD"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_password"
}
}
}
env {
name = "IBKR_FLEX_TOKEN"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "ibkr_flex_token"
}
}
}
env {
name = "IBKR_FLEX_QUERY_ID"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "ibkr_flex_query_id"
}
}
}
env {
name = "IBKR_ACCOUNT_ID_UPSTREAM"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "ibkr_account_id_upstream"
}
}
}
volume_mount {
name = "data"
mount_path = "/data"
}
resources {
requests = { cpu = "20m", memory = "128Mi" }
limits = { memory = "256Mi" }
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# IMAP ingest InvestEngine + Schwab email parsers, one combined pod.
# Phase 2 deliverable. Defined ahead of implementation so the rollout is
# one `tf apply` once the image supports the CLI subcommand.
resource "kubernetes_cron_job_v1" "imap" {
metadata {
name = "broker-sync-imap"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
labels = { app = "broker-sync", component = "imap" }
}
spec {
schedule = "30 2 * * *" # 02:30 UK, 30min after T212
concurrency_policy = "Forbid"
successful_jobs_history_limit = 3
failed_jobs_history_limit = 5
# 2026-05-27: RESUSPENDED. Despite BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine
# being set on the cronjob (commit a4dab03), 39 IMAP-source IE BUYs were
# re-inserted into Wealthfolio at 2026-05-27T09:22:18 UTC exactly the
# rows I'd deleted yesterday during the £252k dedup. The 02:30 cron at
# 02:30 UTC today logged `ie_skipped=53` (skip is working), so the 09:22
# source is something else we haven't pinpointed yet. Suspending eliminates
# one possible vector (e.g., manual reruns / replay queues / future bugs
# where the exclude env doesn't bind correctly). Schwab vest ingestion is
# the only thing we lose; it can be unsuspended once the IE re-dup root
# cause is fixed (researcher subagent investigating; beads task pending).
# Also see code-9ko8 (pre-existing reliability issues).
suspend = false
job_template {
metadata {}
spec {
backoff_limit = 2
ttl_seconds_after_finished = 86400
template {
metadata {
labels = { app = "broker-sync", component = "imap" }
}
spec {
restart_policy = "OnFailure"
# The broker image's user is uid=10001 gid=999, but the shared
# data PVC's /data root was created with gid=10001 (legacy from
# an earlier image build). Without fsGroup the pod can't write
# to the directory sqlite3 can't create the journal next to
# sync.db, hits 'attempt to write a readonly database'.
# fsGroup=10001 adds the matching gid to the pod's supplemental
# groups so writes succeed.
security_context {
fs_group = 10001
}
container {
name = "broker-sync"
image = local.broker_sync_image
command = ["broker-sync", "imap-ingest"]
env {
name = "BROKER_SYNC_DATA_DIR"
value = "/data"
}
# 2026-05-27 (afternoon, post-incident): IE-via-IMAP is now
# STRUCTURALLY OPT-IN at the code level broker_sync.providers.imap
# default-excludes `invest-engine`. The earlier "standardise on IMAP
# for IE" comment was inverted after a sibling Claude session ran
# broker-sync imap-ingest at 09:22 UTC without the EXCLUDE env and
# re-imported the 39 IE BUYs/DEPOSITs the previous day's dedup had
# removed. To re-enable IE-via-IMAP, add:
# env {
# name = "BROKER_SYNC_IMAP_INCLUDE_PROVIDERS"
# value = "invest-engine"
# }
# Until that env is set, only Schwab is parsed (the canonical use
# of the IMAP path Schwab has no public API).
# See post-mortem in beads code-dc1b.
env {
name = "WF_SESSION_PATH"
value = "/data/wealthfolio_session.json"
}
env {
name = "WF_BASE_URL"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_base_url"
}
}
}
env {
name = "WF_USERNAME"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_username"
}
}
}
env {
name = "WF_PASSWORD"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_password"
}
}
}
env {
name = "IMAP_HOST"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "imap_host"
}
}
}
env {
name = "IMAP_USER"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "imap_user"
}
}
}
env {
name = "IMAP_PASSWORD"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "imap_password"
}
}
}
env {
name = "IMAP_DIRECTORY"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "imap_directory"
}
}
}
volume_mount {
name = "data"
mount_path = "/data"
}
resources {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "256Mi" }
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# CSV drop-folder processor Scottish Widows, Fidelity quarterly, Freetrade, etc.
# Phase 3 deliverable. Suspended until CLI subcommand lands.
resource "kubernetes_cron_job_v1" "csv_drop" {
metadata {
name = "broker-sync-csv"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
labels = { app = "broker-sync", component = "csv" }
}
spec {
schedule = "0 3 * * *" # 03:00 UK
concurrency_policy = "Forbid"
successful_jobs_history_limit = 3
failed_jobs_history_limit = 5
suspend = true
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 86400
template {
metadata {
labels = { app = "broker-sync", component = "csv" }
}
spec {
restart_policy = "OnFailure"
container {
name = "broker-sync"
image = local.broker_sync_image
command = ["broker-sync", "csv-drop"]
env {
name = "BROKER_SYNC_DATA_DIR"
value = "/data"
}
env {
name = "WF_SESSION_PATH"
value = "/data/wealthfolio_session.json"
}
env {
name = "WF_BASE_URL"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_base_url"
}
}
}
env {
name = "WF_USERNAME"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_username"
}
}
}
env {
name = "WF_PASSWORD"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_password"
}
}
}
volume_mount {
name = "data"
mount_path = "/data"
}
resources {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "128Mi" }
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# Monthly HMRC FX reconciliation rewrites last-month activities with official
# HMRC rates once they publish. Phase 1 tail / Phase 2 deliverable.
resource "kubernetes_cron_job_v1" "fx_reconcile" {
metadata {
name = "broker-sync-fx-reconcile"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
labels = { app = "broker-sync", component = "fx-reconcile" }
}
spec {
schedule = "5 5 7 * *" # 05:05 UK on the 7th
concurrency_policy = "Forbid"
successful_jobs_history_limit = 3
failed_jobs_history_limit = 5
suspend = true
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 86400
template {
metadata {
labels = { app = "broker-sync", component = "fx-reconcile" }
}
spec {
restart_policy = "OnFailure"
container {
name = "broker-sync"
image = local.broker_sync_image
command = ["broker-sync", "fx-reconcile"]
env {
name = "BROKER_SYNC_DATA_DIR"
value = "/data"
}
env {
name = "WF_SESSION_PATH"
value = "/data/wealthfolio_session.json"
}
env {
name = "WF_BASE_URL"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_base_url"
}
}
}
env {
name = "WF_USERNAME"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_username"
}
}
}
env {
name = "WF_PASSWORD"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_password"
}
}
}
volume_mount {
name = "data"
mount_path = "/data"
}
resources {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "128Mi" }
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# Backup: snapshot sync.db / fx.db / csv-archive into NFS daily, keep 30 days.
# Convention from infra/.claude/CLAUDE.md: every proxmox-lvm app needs a backup
# CronJob writing to /mnt/main/<app>-backup/ on the PVE host (served over NFS).
resource "kubernetes_cron_job_v1" "backup" {
metadata {
name = "broker-sync-backup"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
labels = { app = "broker-sync", component = "backup" }
}
spec {
schedule = "15 4 * * *" # 04:15 UK after all syncs
concurrency_policy = "Forbid"
successful_jobs_history_limit = 3
failed_jobs_history_limit = 5
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 86400
template {
metadata {
labels = { app = "broker-sync", component = "backup" }
}
spec {
restart_policy = "OnFailure"
container {
name = "backup"
image = "alpine:3.20"
command = ["/bin/sh", "-c", <<-EOT
set -eu
TIMESTAMP=$(date +%Y-%m-%dT%H-%M-%S)
BACKUP_DIR="/backup/$TIMESTAMP"
mkdir -p "$BACKUP_DIR"
cp -a /data/sync.db "$BACKUP_DIR/" 2>/dev/null || true
cp -a /data/fx.db "$BACKUP_DIR/" 2>/dev/null || true
if [ -d /data/csv-archive ]; then
cp -a /data/csv-archive "$BACKUP_DIR/"
fi
# Retention: keep last 30 days.
find /backup -mindepth 1 -maxdepth 1 -type d -mtime +30 -exec rm -rf {} +
echo "Backup complete: $BACKUP_DIR"
EOT
]
volume_mount {
name = "data"
mount_path = "/data"
read_only = true
}
volume_mount {
name = "backup"
mount_path = "/backup"
}
resources {
requests = { cpu = "5m", memory = "16Mi" }
limits = { memory = "64Mi" }
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
}
}
volume {
name = "backup"
nfs {
server = var.nfs_server
path = "/srv/nfs/broker-sync-backup"
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# -----------------------------------------------------------------------------
# Fidelity UK PlanViewer monthly pension contribution sync
#
# Architecture notes:
# - The CLI (`broker-sync fidelity-ingest`) loads storage_state.json, boots
# headless Chromium, scrapes the transaction history + valuation JSON, and
# posts DEPOSIT activities to Wealthfolio. See
# broker-sync/docs/providers/fidelity-planviewer.md for the seed workflow.
# - Storage_state is staged to Vault (`secret/broker-sync`
# `fidelity_storage_state`). ESO projects all broker-sync keys into the
# shared `broker-sync-secrets` K8s Secret; an init container writes the
# JSON blob to the PVC so the main container can load it.
# - Image needs Chromium baked in add the `fidelity-capable: "true"` label
# so the Dockerfile/CI treats this CronJob's pod spec as the Playwright
# variant. Until the Playwright image ships, keep `suspend = true`.
# - Schedule: 05:00 UK on the 20th of each month well after Viktor's mid-
# month payroll contribution has settled (finance history shows credits
# landing 13th-18th).
resource "kubernetes_cron_job_v1" "fidelity" {
metadata {
name = "broker-sync-fidelity"
namespace = kubernetes_namespace.broker_sync.metadata[0].name
labels = { app = "broker-sync", component = "fidelity" }
}
spec {
schedule = "0 5 20 * *"
concurrency_policy = "Forbid"
successful_jobs_history_limit = 3
failed_jobs_history_limit = 5
# Unsuspended 2026-05-17 after the delta gains-offset emission landed
# (broker-sync @98c4729). Manual trigger:
# kubectl -n broker-sync create job fid-now \
# --from=cronjob/broker-sync-fidelity
# NB: storage_state expires every 30-90 days see code-r9n for the
# chrome-service-driven re-seed runbook.
suspend = false
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 86400
template {
metadata {
labels = { app = "broker-sync", component = "fidelity" }
}
spec {
restart_policy = "OnFailure"
# Materialise the JSON storage_state from the projected Secret
# onto the PVC where Playwright expects to read it. Init container
# runs as root; the main broker-sync container runs as uid 10001,
# so we chown+chmod 600 to grant read access to the broker user.
init_container {
name = "stage-storage-state"
image = "busybox:1.36"
command = ["/bin/sh", "-c", <<-EOT
set -eu
mkdir -p /data
cp /secrets/fidelity_storage_state /data/fidelity_storage_state.json
chown 10001:10001 /data/fidelity_storage_state.json
chmod 600 /data/fidelity_storage_state.json
EOT
]
volume_mount {
name = "secrets"
mount_path = "/secrets"
read_only = true
}
volume_mount {
name = "data"
mount_path = "/data"
}
resources {
requests = { cpu = "5m", memory = "8Mi" }
limits = { memory = "32Mi" }
}
}
container {
name = "broker-sync"
image = local.broker_sync_image
command = ["broker-sync", "fidelity-ingest"]
env {
name = "BROKER_SYNC_DATA_DIR"
value = "/data"
}
env {
name = "WF_SESSION_PATH"
value = "/data/wealthfolio_session.json"
}
env {
name = "FIDELITY_STORAGE_STATE_PATH"
value = "/data/fidelity_storage_state.json"
}
env {
name = "FIDELITY_PLAN_ID"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "fidelity_plan_id"
}
}
}
env {
name = "WF_BASE_URL"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_base_url"
}
}
}
env {
name = "WF_USERNAME"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_username"
}
}
}
env {
name = "WF_PASSWORD"
value_from {
secret_key_ref {
name = "broker-sync-secrets"
key = "wf_password"
}
}
}
volume_mount {
name = "data"
mount_path = "/data"
}
resources {
# Chromium is hungry headless shell + page rendering
# comfortably under 1Gi, spike up to 1.2Gi during full-page
# screenshots.
requests = { cpu = "50m", memory = "512Mi" }
limits = { memory = "1280Mi" }
}
}
volume {
name = "secrets"
secret {
secret_name = "broker-sync-secrets"
items {
key = "fidelity_storage_state"
path = "fidelity_storage_state"
}
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
}
}
}
}
}
}
}
lifecycle {
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
}
}

View file

@ -0,0 +1,13 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}
dependency "vault" {
config_path = "../vault"
skip_outputs = true
}

133
stacks/calico/main.tf Normal file
View file

@ -0,0 +1,133 @@
# Calico CNI
#
# Calico has underpinned this cluster's pod networking since 2024-07-30, installed
# as raw kubectl manifests (tigera-operator Deployment + CRDs + Installation CR).
# Bringing the full stack under Terraform is high-blast the operator and its
# Deployment must never flap during node pressure or during any apply, because
# new pod scheduling breaks within ~seconds of a CNI outage.
#
# This stack (created 2026-04-18 Wave 5b) adopts the three namespaces only:
# calico-system, calico-apiserver, tigera-operator. The `tigera-operator`
# Deployment, the 20+ CRDs it manages, and the `Installation` CR itself are
# intentionally *not* adopted yet they require a low-traffic window and a
# careful ignore_changes set to cover operator-generated defaults on the
# Installation CR. Follow-up tracked in beads code-3ad.
#
# The namespaces are safe to adopt (no networking impact they're just label
# containers) and give TF an audit trail entry for the labels/tier Kyverno
# cares about.
resource "kubernetes_namespace" "calico_system" {
metadata {
name = "calico-system"
labels = {
name = "calico-system"
# calico-system namespace is managed by tigera-operator auto-update is
# incompatible (operator reverts DaemonSet image from its Installation CR).
# "keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode label on every namespace.
# pod-security.kubernetes.io/* labels are applied by the tigera-operator
# reconciler on calico-system + calico-apiserver for PSA 'privileged'.
ignore_changes = [
metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"],
metadata[0].labels["pod-security.kubernetes.io/enforce"],
metadata[0].labels["pod-security.kubernetes.io/enforce-version"],
]
}
}
resource "kubernetes_namespace" "calico_apiserver" {
metadata {
name = "calico-apiserver"
labels = {
name = "calico-apiserver"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1 + PSA labels applied by tigera-operator (see calico_system).
ignore_changes = [
metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"],
metadata[0].labels["pod-security.kubernetes.io/enforce"],
metadata[0].labels["pod-security.kubernetes.io/enforce-version"],
]
}
}
resource "kubernetes_namespace" "tigera_operator" {
metadata {
name = "tigera-operator"
labels = {
name = "tigera-operator"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
# Wave 1 W1.6 (beads code-8ywc): observation phase via Calico GlobalNetworkPolicy
# `action: Log`. This is the supported primitive on Calico OSS v3.26 the
# Calico-Enterprise FelixConfiguration.flowLogsFileEnabled approach is NOT
# accepted by the OSS CRD (verified 2026-05-19: "strict decoding error").
#
# How it works:
# - GNP selects pods by namespaceSelector
# - egress rule action=Log writes an iptables NFLOG entry that lands in the
# kernel log / journald with prefix "calico-packet:" on each node
# - Alloy DaemonSet already ships node-journal to Loki (job=node-journal)
# - LogQL query: {job="node-journal"} |= "calico-packet" surfaces egress flows
# - After ~1 week of observation, build the empirical per-namespace egress
# allowlist; then flip the same GNP to [Allow specific dests, Deny rest]
#
# Started with `recruiter-responder` as the pilot on 2026-05-19; expanded
# 2026-05-19 to all tier 3+4 namespaces (per locked plan tier 3-edge has
# 17 ns, tier 4-aux has 65 ns, all use Calico's WorkloadEndpoint policy
# path). Tier 0/1/2 stay out of observation in wave 1 (cluster infra +
# GPU workloads, deferred per the plan).
#
# `apply_only = true` on the kubectl_manifest means renaming the TF resource
# does NOT destroy the old GNP via TF we kubectl delete the legacy pilot
# GNP after this applies to clean it up. (Tracked manually.)
resource "kubectl_manifest" "wave1_egress_observe_tier34" {
yaml_body = yamlencode({
apiVersion = "projectcalico.org/v3"
kind = "GlobalNetworkPolicy"
metadata = {
name = "wave1-egress-observe-tier34"
annotations = {
"security.viktorbarzin.me/wave" = "1"
"security.viktorbarzin.me/purpose" = "observe-then-enforce egress for tier 3-edge + 4-aux"
}
}
spec = {
order = 2000
selector = "all()"
namespaceSelector = "tier in {\"3-edge\", \"4-aux\"}"
types = ["Egress"]
egress = [
# Rule 1: log every egress packet (LOG target writes to kernel/journal,
# alloy ships to Loki with job=node-journal,transport=kernel).
# LogQL: {job="node-journal"} |~ "calico-packet"
{ action = "Log" },
# Rule 2: allow everything (observation must NOT break workloads).
{ action = "Allow" },
]
}
})
apply_only = true
}
# CI retrigger 2026-05-16T13:42:57+00:00 bulk enrollment apply (pipeline #689 killed)
# CI retrigger v2 2026-05-16T13:46:35+00:00
# CI retrigger v3 2026-05-16T14:06:39Z
# CI retrigger v4 2026-05-16T14:13:59Z
# CI retrigger v5 2026-05-16T23:10:38Z
# CI retrigger v6 2026-05-16T23:18:58Z

1
stacks/calico/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,6 @@
include "root" {
path = find_in_parent_folders()
}
# No platform dependency Calico provides the cluster network the rest
# of the platform runs on. This stack must not introduce a dep cycle.

152
stacks/changedetection/.terraform.lock.hcl generated Normal file
View file

@ -0,0 +1,152 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.7"
constraints = "~> 4.0"
hashes = [
"h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
"zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
"zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
"zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
"zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
"zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
"zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
"zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
"zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
"zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
"zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
"zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
"zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
"zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
]
}
provider "registry.terraform.io/gavinbunney/kubectl" {
version = "1.19.0"
constraints = "~> 1.14"
hashes = [
"h1:9QkxPjp0x5FZFfJbE+B7hBOoads9gmdfj9aYu5N4Sfc=",
"zh:1dec8766336ac5b00b3d8f62e3fff6390f5f60699c9299920fc9861a76f00c71",
"zh:43f101b56b58d7fead6a511728b4e09f7c41dc2e3963f59cf1c146c4767c6cb7",
"zh:4c4fbaa44f60e722f25cc05ee11dfaec282893c5c0ffa27bc88c382dbfbaa35c",
"zh:51dd23238b7b677b8a1abbfcc7deec53ffa5ec79e58e3b54d6be334d3d01bc0e",
"zh:5afc2ebc75b9d708730dbabdc8f94dd559d7f2fc5a31c5101358bd8d016916ba",
"zh:6be6e72d4663776390a82a37e34f7359f726d0120df622f4a2b46619338a168e",
"zh:72642d5fcf1e3febb6e5d4ae7b592bb9ff3cb220af041dbda893588e4bf30c0c",
"zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
"zh:a1da03e3239867b35812ee031a1060fed6e8d8e458e2eaca48b5dd51b35f56f7",
"zh:b98b6a6728fe277fcd133bdfa7237bd733eae233f09653523f14460f608f8ba2",
"zh:bb8b071d0437f4767695c6158a3cb70df9f52e377c67019971d888b99147511f",
"zh:dc89ce4b63bfef708ec29c17e85ad0232a1794336dc54dd88c3ba0b77e764f71",
"zh:dd7dd18f1f8218c6cd19592288fde32dccc743cde05b9feeb2883f37c2ff4b4e",
"zh:ec4bd5ab3872dedb39fe528319b4bba609306e12ee90971495f109e142d66310",
"zh:f610ead42f724c82f5463e0e71fa735a11ffb6101880665d93f48b4a67b9ad82",
]
}
provider "registry.terraform.io/goauthentik/authentik" {
version = "2024.12.1"
constraints = "~> 2024.10"
hashes = [
"h1:roBMd+gi+TGgikH/bMzEI8JfvJiMAQWt+8FmokCrQIs=",
"zh:090260dc7889ea822ec1d899344e1ee23eba5290461989c0796149c9511f2316",
"zh:13c2655ff824b0dc4b9bb832b5ca6d41dba97cb280330258c5fef4115e236209",
"zh:166a73c3a810c9c895d68a8ff968158f339f8a2c1c03e20ec9fc5ed99cc64e20",
"zh:203777eae1cdc711233315499643180604cff2324411b186b7cf07fdbe16f655",
"zh:3b2f18c9a8d28dac74dc6bbf168c946855ab9c68f053578d4630c50d5eaf30a0",
"zh:4822275985f6b74b6196c47112316a4252db22cf4ceaef7c9ab4c66d488abf2f",
"zh:53ea97562666c8a5a2f6d63d418a302a7f8ee4b7bb7da35dedaa89aa5708b7f0",
"zh:56b8a230901e3550c92a1d3f58ee9dafe9853f30fe4315af3ab28ae63262e15d",
"zh:6293ab7b1fd8206a0c853591f50186aca4a1eff117b2a773e10760a23a2c83e9",
"zh:9433970f79fb92d8aae3ee436db5630ab312c78b6dc9df9c1db3273a18f8aaa1",
"zh:95df406214f79b3b98222d7c7fe8fc319a3d90b7a9d53e1d5abbda5dfb8b9436",
"zh:a85880da0552a42c8f449390fbd7d8b03541d1a13e04bba9f1404fa658754260",
"zh:a95f6e9bd62c67e70eba1b1a14728856b9a6a28cd1e5e3be54a7718882c87e7f",
"zh:dd599b51c5beb34a4c6feece244fde07d2558d69929449ab1fd39a5ebe738781",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.1.0"
hashes = [
"h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=",
"zh:0215c5c60be62028c09a2f22458e89cda3ef5830a632299f1d401eb3538874b0",
"zh:09ebb9f442431e278a310a9423f32caf467cb4b3cad3fe59573ca71fa7b14e20",
"zh:0c4e5912f83bb35846ae0a9ae54fc320706ee61894cd21cc6b4181b1c5a2fa5c",
"zh:1678c982853ad461e65ccb5e79d585e13ed109dd47dab2a66d3a7a304faeef65",
"zh:1c050a5c15e330457a9c18caacf61a923c59d663e13f2962e4b32f04fef523a0",
"zh:2c55bcec83be58ec132c7cb0a1ac644758b800d794fdc636d53a0eada0358a3a",
"zh:a062bb0aa316c08d8460c66a5d68da71da40de5d3bc3b31abcf3a1a9a19650f1",
"zh:a26fdea0afaa9b247c73c0b42843ca51ba7db0ac2571f9d3d50dcabd20ca1b98",
"zh:c872c9385a78d502bf5823d61cd3bb0f9a0585030e025eb12585c83451beeaa1",
"zh:f180879af931182beee4c8c0d9dab62b81d86f17ddcbe3786ef4c7cec9163a4e",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
"zh:f70f5789264069e0eef06f9b5d5fde955ef7206f7d446d1ce51a4c37a3f3e02f",
]
}
provider "registry.terraform.io/hashicorp/vault" {
version = "4.8.0"
constraints = "~> 4.0"
hashes = [
"h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
"h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
"zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
"zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
"zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
"zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
"zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
"zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
"zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
"zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
"zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
"zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
]
}
provider "registry.terraform.io/telmate/proxmox" {
version = "3.0.2-rc07"
constraints = "3.0.2-rc07"
hashes = [
"h1:zp5hpQJQ4t4zROSLqdltVpBO+Riy9VugtfFbpyTw1aM=",
"zh:2ee860cd0a368b3eaa53f4a9ea46f16dab8a97929e813ea6ef55183f8112c2ca",
"zh:415965fd915bae2040d7f79e45f64d6e3ae61149c10114efeac1b34687d7296c",
"zh:6584b2055df0e32062561c615e3b6b2c291ca8c959440adda09ef3ec1e1436bd",
"zh:65dcfad71928e0a8dd9befc22524ed686be5020b0024dc5cca5184c7420eeb6b",
"zh:7253dc29bd265d33f2791ac4f779c5413f16720bb717de8e6c5fcb2c858648ea",
"zh:7ec8993da10a47606670f9f67cfd10719a7580641d11c7aa761121c4a2bd66fb",
"zh:999a3f7a9dcf517967fc537e6ec930a8172203642fb01b8e1f78f908373db210",
"zh:a50e6df7280eb6584a5fd2456e3f5b6df13b2ec8a7fa4605511e438e1863be42",
"zh:b25b329a1e42681c509d027fee0365414f0cc5062b65690cfc3386aab16132ae",
"zh:c028877fdb438ece48f7bc02b65bbae9ca7b7befbd260e519ccab6c0cbb39f26",
"zh:cf0eaa3ea9fcc6d62793637947f1b8d7c885b6ad74695ab47e134e4ff132190f",
"zh:d5ade3fae031cc629b7c512a7b60e46570f4c41665e88a595d7efd943dde5ab2",
"zh:f388c15ad1ecfc09e7361e3b98bae9b627a3a85f7b908c9f40650969c949901c",
"zh:f415cc6f735a3971faae6ac24034afdb9ee83373ef8de19a9631c187d5adc7db",
]
}

View file

@ -0,0 +1,234 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
variable "nfs_server" { type = string }
resource "kubernetes_namespace" "changedetection" {
metadata {
name = "changedetection"
labels = {
"istio-injection" : "disabled"
tier = local.tiers.aux
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "changedetection-secrets"
namespace = "changedetection"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "changedetection-secrets"
}
dataFrom = [{
extract = {
key = "changedetection"
}
}]
}
}
depends_on = [kubernetes_namespace.changedetection]
}
data "kubernetes_secret" "eso_secrets" {
metadata {
name = "changedetection-secrets"
namespace = kubernetes_namespace.changedetection.metadata[0].name
}
depends_on = [kubernetes_manifest.external_secret]
}
locals {
homepage_credentials = jsondecode(data.kubernetes_secret.eso_secrets.data["homepage_credentials"])
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.changedetection.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# Datastore on NFS. Migrated off proxmox-lvm 2026-06-05 for LUN-cap relief
# changedetection uses a file-based JSON datastore (no embedded DB), NFS-safe.
# See docs/plans/2026-06-05-block-storage-harden-nfs-design.md
module "nfs_changedetection" {
source = "../../modules/kubernetes/nfs_volume"
name = "changedetection-data-nfs"
namespace = kubernetes_namespace.changedetection.metadata[0].name
nfs_server = var.nfs_server
nfs_path = "/srv/nfs/changedetection"
storage = "8Gi"
}
resource "kubernetes_deployment" "changedetection" {
metadata {
name = "changedetection"
namespace = kubernetes_namespace.changedetection.metadata[0].name
labels = {
app = "changedetection"
tier = local.tiers.aux
}
}
spec {
# Disabled: chronic OOM at 64Mi limit, not worth the memory cost to increase
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "changedetection"
}
}
template {
metadata {
labels = {
app = "changedetection"
}
}
spec {
container {
name = "sockpuppetbrowser"
image = "dgtlmoon/sockpuppetbrowser:latest"
image_pull_policy = "IfNotPresent"
port {
name = "ws"
container_port = 3000
protocol = "TCP"
}
security_context {
capabilities {
add = ["SYS_ADMIN"]
}
}
resources {
requests = {
cpu = "25m"
memory = "128Mi"
}
limits = {
memory = "128Mi"
}
}
}
container {
name = "changedetection"
image = "ghcr.io/dgtlmoon/changedetection.io:latest" # latest is latest stable
env {
name = "PLAYWRIGHT_DRIVER_URL"
value = "ws://localhost:3000"
}
env {
name = "BASE_URL"
value = "https://changedetection.viktorbarzin.me"
}
env {
name = "LOGGER_LEVEL"
value = "WARNING"
}
env {
name = "TZ"
value = "Europe/Sofia"
}
volume_mount {
name = "data"
mount_path = "/datastore"
}
port {
name = "http"
container_port = 5000
protocol = "TCP"
}
resources {
requests = {
cpu = "15m"
memory = "256Mi"
}
limits = {
memory = "512Mi"
}
}
}
# security_context {
# fs_group = "1500"
# }
volume {
name = "data"
persistent_volume_claim {
claim_name = module.nfs_changedetection.claim_name
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[1].image,
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "changedetection" {
metadata {
name = "changedetection"
namespace = kubernetes_namespace.changedetection.metadata[0].name
labels = {
"app" = "changedetection"
}
}
spec {
selector = {
app = "changedetection"
}
port {
port = 80
target_port = 5000
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
dns_type = "proxied"
namespace = kubernetes_namespace.changedetection.metadata[0].name
name = "changedetection"
tls_secret_name = var.tls_secret_name
auth = "required"
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Changedetection"
"gethomepage.dev/description" = "Website change monitor"
"gethomepage.dev/icon" = "changedetection.png"
"gethomepage.dev/group" = "Automation"
"gethomepage.dev/pod-selector" = ""
"gethomepage.dev/widget.type" = "changedetectionio"
"gethomepage.dev/widget.url" = "http://changedetection.changedetection.svc.cluster.local"
"gethomepage.dev/widget.key" = local.homepage_credentials["changedetection"]["api_key"]
}
}

View file

@ -0,0 +1,53 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
authentik = {
source = "goauthentik/authentik"
version = "~> 2024.10"
}
# kubectl (gavinbunney) workaround for hashicorp/kubernetes
# `kubernetes_manifest` panics on Kyverno CRDs. See beads code-e2dp.
# Declared for all stacks but only used where opted-in.
kubectl = {
source = "gavinbunney/kubectl"
version = "~> 1.14"
}
proxmox = {
source = "telmate/proxmox"
version = "3.0.2-rc07"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}
provider "kubectl" {
config_path = var.kube_config_path
load_config_file = true
}

View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,9 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}

View file

@ -0,0 +1,119 @@
# chrome-service
In-cluster headed Chromium exposed over the Chrome DevTools Protocol
(CDP) on TCP :9222. Sibling services drive it instead of running their
own in-process browser — useful when the upstream tries to detect
headless mode (e.g. hmembeds' `disable-devtool.js` redirect-to-google
trap). Also publishes an hourly snapshot of cookies + localStorage so
external dev-box Claude Code sessions can warm their isolated
playwright contexts from the same logged-in profile.
## Connect (in-cluster callers)
```python
from playwright.async_api import async_playwright
CDP_URL = "http://chrome-service.chrome-service.svc.cluster.local:9222"
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(CDP_URL, timeout=15_000)
# browser.contexts[0] is the persistent default context (the one
# the user logs into via noVNC). For bot work that should NOT share
# cookies, create a fresh incognito context:
context = await browser.new_context()
await context.add_init_script(STEALTH_JS)
page = await context.new_page()
...
await browser.close()
```
NetworkPolicy is the only gate on the CDP endpoint — labelled client
namespaces or explicit fallback (`f1-stream`). No bearer token is
required for the connection itself.
## Snapshot endpoint (external callers)
```bash
# Bearer token comes from Vault secret/chrome-service.api_bearer_token.
TOKEN=$(vault kv get -field=api_bearer_token secret/chrome-service)
curl -fsSL \
-H "Authorization: Bearer $TOKEN" \
https://chrome.viktorbarzin.me/api/snapshot \
> storage-state.json
# Use the snapshot with @playwright/mcp:
npx @playwright/mcp@latest --port 8931 --host localhost \
--headless --browser chrome \
--isolated --storage-state ./storage-state.json
```
The snapshot is refreshed hourly by the `chrome-service-snapshot-harvester`
CronJob (schedule `23 * * * *`) which calls `context.storageState()` via
the CDP endpoint and writes to `/profile/snapshots/storage-state.json`
(atomic rename). The `snapshot-server` sidecar serves that file.
## Add a new in-cluster caller
1. **Label the caller's namespace** so the chrome-service NetworkPolicy
admits it:
```hcl
resource "kubernetes_namespace" "<ns>" {
metadata {
labels = {
"chrome-service.viktorbarzin.me/client" = "true"
}
}
}
```
2. **Inject `CHROME_CDP_URL`** into the caller's pod env:
```hcl
env {
name = "CHROME_CDP_URL"
value = "http://chrome-service.chrome-service.svc.cluster.local:9222"
}
```
3. **Vendor `stealth.js`** into the caller (or just paste — it's ~40
lines) and apply via `await context.add_init_script(STEALTH_JS)` after
every `new_context()`. Without it, hmembeds-class anti-bot still trips.
## Image pin
Both the server image (`mcr.microsoft.com/playwright:v1.48.0-noble` in
`main.tf`) and the client (`playwright==1.48.0` in callers' requirements)
must match minor-versions. Bump in lockstep — Playwright protocol changes
between minors.
## Operations
- **Storage**: encrypted PVC at `/profile`. Chromium user-data-dir lives
at `/profile/chromium-data` — cookies + localStorage + IndexedDB
persist here. Snapshots at `/profile/snapshots/storage-state.json`.
Backed up tar+gzip every 6h to `/srv/nfs/chrome-service-backup/`,
30-day retention.
- **Probes**: TCP/9222. Chrome's CDP serves `/json/version` once it's
bound; TCP-open is enough for readiness.
- **Health page**: visit `https://chrome.viktorbarzin.me` (Authentik-
gated) to confirm the pod is up and to log into sites. The CDP port
stays internal-only.
- **Token rotation**: `vault kv put secret/chrome-service api_bearer_token=$(python3 -c 'import secrets; print(secrets.token_urlsafe(32))')`.
Reloader cascades to the snapshot-server sidecar. Update the cached
token on any dev box that pulls the snapshot:
`vault kv get -field=api_bearer_token secret/chrome-service > ~/.config/playwright/token`.
## Why headed (Xvfb) instead of headless?
`disable-devtool.js` and similar libraries detect `navigator.webdriver`,
console-clear timing, and the `HeadlessChromium/...` user-agent suffix.
Running headed inside `Xvfb :99` reports as a normal Chromium, and the
stealth init script handles the JS-visible giveaways.
## Why direct chromium (CDP) instead of `playwright launch-server`?
`playwright launch-server` creates ephemeral browser contexts per
`connect()` call — cookies and localStorage never persist to the PVC.
The `/profile` mount only ever held npm cache + fontconfig cache
despite the original docs claiming it held "cookies, localStorage,
IndexedDB". Switched 2026-06-04 to direct chromium launch with
`--user-data-dir=/profile/chromium-data --remote-debugging-port=9222`
so the persistent profile actually persists, and callers migrate
`chromium.connect(ws_url)``chromium.connect_over_cdp(cdp_url)`.

View file

@ -0,0 +1,214 @@
#!/usr/bin/env python3
"""CDP-aware proxy: 0.0.0.0:9222 → 127.0.0.1:9223 with Host header rewriting.
Why this exists:
Stock Chrome binaries silently ignore --remote-debugging-address (the flag is
gated by a build-time switch most distributions don't set), so CDP always
binds 127.0.0.1:<port>. Worse, Chrome enforces DNS rebinding protection on
the HTTP DevTools endpoint: any Host header that isn't `localhost`,
`127.0.0.1`, or `[::1]` returns 500 "Host header is specified and is not an
IP address or localhost". There is no `--remote-allow-hosts` flag in stock
Chrome 130 (verified by binary string search).
This means a raw TCP forwarder doesn't work — clients hitting the K8s
Service DNS get 500 because Chrome rejects the Host header.
What this script does:
- Listens on 0.0.0.0:9222 (the public CDP port the K8s Service exposes).
- For each TCP connection from a CDP client:
1. Read the HTTP request line + headers.
2. Rewrite `Host: <whatever>` to `Host: localhost:9222`, remembering
the original value (for response rewriting).
3. Open a connection to Chrome at 127.0.0.1:9223 and forward the
modified request line + headers + body.
4. Read Chrome's HTTP response. If it's 101 Switching Protocols
(WebSocket upgrade), forward it as-is and switch to raw byte piping
in both directions (CDP frames are binary, no further parsing).
5. Otherwise it's a regular HTTP/JSON response. Substitute
`localhost:9222` (the URL Chrome composed from the rewritten Host)
back to the client's original Host header value. Forward.
- The Microsoft playwright image ships python3 but not socat, hence this
stdlib-only helper.
Limitations:
- Only HTTP/1.x supported (CDP doesn't use HTTP/2).
- Body is assumed to fit in one read for non-WS responses (CDP JSON
responses are kilobytes, well within limits).
- No SSL/TLS the cluster network is the trust boundary.
"""
import os
import socket
import sys
import threading
LISTEN_ADDR = os.environ.get("BRIDGE_LISTEN_ADDR", "0.0.0.0")
LISTEN_PORT = int(os.environ.get("BRIDGE_LISTEN_PORT", "9222"))
TARGET_ADDR = os.environ.get("BRIDGE_TARGET_ADDR", "127.0.0.1")
TARGET_PORT = int(os.environ.get("BRIDGE_TARGET_PORT", "9223"))
INTERNAL_HOST = f"localhost:{LISTEN_PORT}"
def recv_until(sock: socket.socket, marker: bytes, max_bytes: int = 65536) -> bytes:
"""Read from sock until marker is seen or max_bytes hit. Returns everything read."""
buf = b""
while marker not in buf and len(buf) < max_bytes:
chunk = sock.recv(4096)
if not chunk:
break
buf += chunk
return buf
def rewrite_host(headers: bytes, new_host: str) -> tuple[bytes, str | None]:
"""Replace the Host header. Returns (new_headers, original_host)."""
lines = headers.split(b"\r\n")
original = None
out = []
for line in lines:
if line.lower().startswith(b"host:"):
original = line.split(b":", 1)[1].strip().decode("latin-1")
out.append(f"Host: {new_host}".encode("latin-1"))
else:
out.append(line)
return b"\r\n".join(out), original
def pipe(src: socket.socket, dst: socket.socket) -> None:
"""Raw byte pipe used after WS upgrade."""
try:
while True:
data = src.recv(65536)
if not data:
break
dst.sendall(data)
except OSError:
pass
finally:
try:
src.shutdown(socket.SHUT_RD)
except OSError:
pass
try:
dst.shutdown(socket.SHUT_WR)
except OSError:
pass
def handle(client: socket.socket) -> None:
upstream: socket.socket | None = None
try:
# Read until end-of-headers.
head_buf = recv_until(client, b"\r\n\r\n")
if b"\r\n\r\n" not in head_buf:
return
head, tail = head_buf.split(b"\r\n\r\n", 1)
new_head, original_host = rewrite_host(head, INTERNAL_HOST)
upstream = socket.create_connection((TARGET_ADDR, TARGET_PORT), timeout=5)
# `create_connection(timeout=5)` sets the socket's timeout to 5s,
# which then applies to all subsequent recv() calls too. After a WS
# upgrade either side can stay silent for minutes — leave timeouts
# off so the pipe doesn't blow up the connection on idle.
upstream.settimeout(None)
upstream.sendall(new_head + b"\r\n\r\n" + tail)
# Read response headers from upstream.
resp_head_buf = recv_until(upstream, b"\r\n\r\n")
if b"\r\n\r\n" not in resp_head_buf:
return
resp_head, resp_tail = resp_head_buf.split(b"\r\n\r\n", 1)
first_line = resp_head.split(b"\r\n", 1)[0].decode("latin-1", errors="replace")
# Match any 101 status (Chrome's CDP says "101 WebSocket Protocol
# Handshake", not the canonical "101 Switching Protocols"). Sniff the
# status code from the first line, e.g. "HTTP/1.1 101 ...".
parts = first_line.split(" ", 2)
status_code = parts[1] if len(parts) >= 2 else ""
if status_code == "101":
# WS upgrade. Forward as-is and start raw pipe.
client.sendall(resp_head + b"\r\n\r\n" + resp_tail)
t1 = threading.Thread(target=pipe, args=(client, upstream), daemon=True)
t2 = threading.Thread(target=pipe, args=(upstream, client), daemon=True)
t1.start()
t2.start()
t1.join()
t2.join()
return
# Regular HTTP response. Determine body length (Content-Length only —
# CDP doesn't use chunked encoding for /json/* endpoints) and rewrite.
content_length = 0
for line in resp_head.split(b"\r\n"):
if line.lower().startswith(b"content-length:"):
try:
content_length = int(line.split(b":", 1)[1].strip())
except ValueError:
pass
break
body = resp_tail
while len(body) < content_length:
chunk = upstream.recv(65536)
if not chunk:
break
body += chunk
# Truncate any extra bytes that came past content_length (shouldn't
# happen with stock chrome but defensive against pipelined responses).
if content_length and len(body) > content_length:
body = body[:content_length]
# Rewrite the URLs Chrome composed using its localhost Host so callers
# can follow them back through this bridge.
if original_host:
body = body.replace(INTERNAL_HOST.encode(), original_host.encode())
# Rebuild response headers: drop any existing Content-Length / Connection
# header and force `Connection: close` + the new Content-Length. This
# keeps the bridge one-request-per-connection (no keep-alive); avoids a
# whole class of upstream/downstream desync issues, especially because
# Node's ws library will open a fresh TCP for the WS upgrade rather
# than trying to reuse the HTTP probe's connection.
new_lines = []
for line in resp_head.split(b"\r\n"):
l = line.lower()
if l.startswith(b"content-length:") or l.startswith(b"connection:"):
continue
new_lines.append(line)
new_lines.append(f"Content-Length: {len(body)}".encode())
new_lines.append(b"Connection: close")
resp_head = b"\r\n".join(new_lines)
client.sendall(resp_head + b"\r\n\r\n" + body)
except Exception as e:
sys.stderr.write(f"[cdp-bridge] handle error: {e}\n")
finally:
try:
client.close()
except OSError:
pass
if upstream is not None:
try:
upstream.close()
except OSError:
pass
def main() -> int:
listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listener.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listener.bind((LISTEN_ADDR, LISTEN_PORT))
listener.listen(64)
sys.stderr.write(
f"[cdp-bridge] HTTP-aware proxy listening on {LISTEN_ADDR}:{LISTEN_PORT}"
f"{TARGET_ADDR}:{TARGET_PORT} (rewriting Host → {INTERNAL_HOST})\n"
)
while True:
client, _ = listener.accept()
threading.Thread(target=handle, args=(client,), daemon=True).start()
if __name__ == "__main__":
sys.exit(main() or 0)

View file

@ -0,0 +1,19 @@
FROM docker.io/library/ubuntu:24.04
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
x11vnc \
novnc \
websockify \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# noVNC ships /usr/share/novnc/vnc.html; alias to index.html so / works.
RUN ln -sf /usr/share/novnc/vnc.html /usr/share/novnc/index.html
EXPOSE 6080
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
CMD ["/entrypoint.sh"]

View file

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Connect to the chrome-service container's Xvfb (shared pod network, TCP)
# and serve the noVNC HTML5 client + websockify bridge on :6080.
set -e
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
if echo > /dev/tcp/127.0.0.1/6099 2>/dev/null; then
echo "Xvfb TCP up after attempt $i"
break
fi
echo "waiting for Xvfb TCP 6099 attempt=$i"
sleep 2
done
# websockify runs as PID 1; x11vnc is a child so its logs land on container stdout
# `-noshm` skips MIT-SHM probes that fail across container boundaries (each
# container has its own /dev/shm); `-noxdamage` skips XDAMAGE which Xvfb
# doesn't expose; `-quiet` keeps the polling chatter out of pod logs.
echo "starting x11vnc -> :5900"
x11vnc -display localhost:99 -nopw -listen 0.0.0.0 -rfbport 5900 \
-forever -shared -noshm -noxdamage -quiet 2>&1 &
X11VNC_PID=$!
for i in 1 2 3 4 5 6 7 8 9 10; do
if echo > /dev/tcp/127.0.0.1/5900 2>/dev/null; then
echo "x11vnc bound 5900 after attempt $i"
break
fi
echo "waiting for x11vnc :5900 attempt=$i"
sleep 2
done
if ! echo > /dev/tcp/127.0.0.1/5900 2>/dev/null; then
echo "ERROR: x11vnc did not bind 5900"
exit 1
fi
echo "starting websockify -> :6080"
exec websockify --web=/usr/share/novnc 6080 localhost:5900

View file

@ -0,0 +1,69 @@
#!/usr/bin/env python3
"""Connect to chrome-service via CDP, dump storage state, write atomically.
Runs hourly as a Kubernetes CronJob. Mounts the chrome-service encrypted
PVC at /profile (same node via pod-affinity) and writes the snapshot to
/profile/snapshots/storage-state.json. The snapshot-server sidecar reads
from the same path and serves it bearer-gated.
CDP endpoint is plain HTTP protection is the chrome-service
NetworkPolicy (allow only labelled client namespaces). Same security model
as the previous WS endpoint, just unauthenticated within the trust zone.
"""
import asyncio
import logging
import os
import pathlib
import sys
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger("snapshot-harvester")
CDP_URL = os.environ.get(
"CDP_URL", "http://chrome-service.chrome-service.svc.cluster.local:9222"
)
SNAPSHOT_DIR = pathlib.Path(os.environ.get("SNAPSHOT_DIR", "/profile/snapshots"))
SNAPSHOT_FILE = SNAPSHOT_DIR / "storage-state.json"
TMP_FILE = SNAPSHOT_DIR / "storage-state.json.tmp"
async def main() -> int:
try:
from playwright.async_api import async_playwright
except ImportError:
log.error("playwright not installed in image")
return 2
SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True)
async with async_playwright() as p:
try:
browser = await p.chromium.connect_over_cdp(CDP_URL, timeout=20_000)
except Exception:
log.exception("connect_over_cdp failed (%s)", CDP_URL)
return 3
try:
contexts = browser.contexts
if not contexts:
log.error("no browser contexts found — chrome-service may not have launched a persistent context yet")
return 4
ctx = contexts[0]
# storage_state writes cookies + localStorage to a JSON file.
# IndexedDB and sessionStorage are NOT included (known Playwright limitation).
await ctx.storage_state(path=str(TMP_FILE))
os.replace(TMP_FILE, SNAPSHOT_FILE)
size = SNAPSHOT_FILE.stat().st_size
log.info("wrote snapshot (%d bytes) to %s", size, SNAPSHOT_FILE)
finally:
try:
await browser.close()
except Exception:
pass
return 0
if __name__ == "__main__":
sys.exit(asyncio.run(main()))

View file

@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""Tiny HTTP server that exposes /api/snapshot, gated by a bearer token.
Runs as a sidecar in the chrome-service pod. Reads the persisted storage
state written hourly by the snapshot-harvester CronJob and returns it to
authenticated callers (the dev-box `playwright-snapshot-refresh` timer).
Token is read from the PW_TOKEN env var, same secret the legacy WS path
used. The endpoint is mounted behind Traefik on `chrome.viktorbarzin.me`
at the `/api/snapshot` path (auth=none at the ingress; the bearer check
is here).
"""
import os
import sys
from http.server import HTTPServer, BaseHTTPRequestHandler
TOKEN = os.environ.get("PW_TOKEN")
SNAPSHOT_PATH = os.environ.get(
"SNAPSHOT_PATH", "/profile/snapshots/storage-state.json"
)
PORT = int(os.environ.get("PORT", "8088"))
class Handler(BaseHTTPRequestHandler):
server_version = "chrome-snapshot/1"
def _short(self, status: int, body: bytes = b"") -> None:
self.send_response(status)
self.send_header("Content-Length", str(len(body)))
self.end_headers()
if body:
self.wfile.write(body)
def do_GET(self):
if self.path == "/healthz":
self._short(200, b"ok\n")
return
if self.path != "/api/snapshot":
self._short(404)
return
if TOKEN is None:
self._short(503, b"{\"error\":\"token not configured\"}\n")
return
if self.headers.get("Authorization", "") != f"Bearer {TOKEN}":
self._short(401, b"{\"error\":\"invalid bearer\"}\n")
return
try:
with open(SNAPSHOT_PATH, "rb") as f:
data = f.read()
except FileNotFoundError:
self._short(404, b"{\"error\":\"snapshot not yet available\"}\n")
return
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Cache-Control", "no-cache")
self.send_header("Content-Length", str(len(data)))
self.end_headers()
self.wfile.write(data)
def log_message(self, fmt, *args):
sys.stderr.write(
"[snapshot-server] %s - %s\n" % (self.address_string(), fmt % args)
)
if __name__ == "__main__":
HTTPServer(("0.0.0.0", PORT), Handler).serve_forever()

View file

@ -0,0 +1,54 @@
// Minimal stealth init script for Playwright-driven Chromium.
// Vendored from puppeteer-extra-plugin-stealth/evasions/* (MIT) — covers:
// webdriver, chrome.runtime, navigator.plugins, navigator.languages,
// Permissions.query, WebGL getParameter (vendor + renderer spoof).
// Run via context.add_init_script() so it executes before any page script.
(() => {
// navigator.webdriver — most common detection, removed entirely.
Object.defineProperty(Navigator.prototype, 'webdriver', { get: () => undefined });
// window.chrome.runtime — many sites check that real Chrome exposes this.
if (!window.chrome) window.chrome = {};
window.chrome.runtime = window.chrome.runtime || {};
// navigator.plugins — headless reports zero; spoof a plausible PDF viewer.
Object.defineProperty(navigator, 'plugins', {
get: () => [{ name: 'Chrome PDF Plugin' }, { name: 'Chrome PDF Viewer' }, { name: 'Native Client' }],
});
// navigator.languages — headless returns empty array.
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
// Permissions.query — headless returns 'denied' for notifications instead of 'default'.
const origQuery = window.navigator.permissions && window.navigator.permissions.query;
if (origQuery) {
window.navigator.permissions.query = (parameters) =>
parameters && parameters.name === 'notifications'
? Promise.resolve({ state: Notification.permission })
: origQuery(parameters);
}
// WebGL getParameter — spoof vendor + renderer strings to a real GPU.
const spoofGl = (proto) => {
if (!proto) return;
const orig = proto.getParameter;
proto.getParameter = function (parameter) {
if (parameter === 37445) return 'Intel Inc.'; // UNMASKED_VENDOR_WEBGL
if (parameter === 37446) return 'Intel Iris OpenGL Engine'; // UNMASKED_RENDERER_WEBGL
return orig.apply(this, arguments);
};
};
spoofGl(window.WebGLRenderingContext && window.WebGLRenderingContext.prototype);
spoofGl(window.WebGL2RenderingContext && window.WebGL2RenderingContext.prototype);
// disable-devtool.js (theajack/disable-devtool) auto-inits via a script
// tag with `disable-devtool-auto`. Its Performance detector trips under
// Playwright (CDP adds console.log latency vs console.table) and the
// redirect URL is hard-coded — for hmembeds that's google.com.
// Hide the auto-init marker so the library's IIFE exits early.
const origQS = Document.prototype.querySelector;
Document.prototype.querySelector = function (sel) {
if (typeof sel === 'string' && sel.indexOf('disable-devtool-auto') !== -1) return null;
return origQS.apply(this, arguments);
};
})();

View file

@ -0,0 +1,833 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
variable "nfs_server" { type = string }
locals {
namespace = "chrome-service"
labels = {
app = "chrome-service"
}
# Pin to the same Playwright minor that the Python client requires.
# If you bump this image, also bump `playwright==X.Y.Z` in callers'
# requirements (currently f1-stream, snapshot-harvester) and re-run the
# connect smoke test. Image ships chromium under /ms-playwright/.
image = "mcr.microsoft.com/playwright:v1.48.0-noble"
# Python image for the snapshot-harvester CronJob and the snapshot-server
# sidecar (the latter just runs a 60-line stdlib HTTP server).
python_image = "mcr.microsoft.com/playwright/python:v1.48.0-noble"
snapshot_dir = "/profile/snapshots"
}
# --- Namespace ---
resource "kubernetes_namespace" "chrome_service" {
metadata {
name = local.namespace
labels = {
"istio-injection" = "disabled"
tier = local.tiers.aux
"chrome-service.viktorbarzin.me/server" = "true"
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
# --- Secrets (single-key extract: api_bearer_token) ---
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "chrome-service-secrets"
namespace = local.namespace
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "chrome-service-secrets"
}
dataFrom = [{
extract = {
key = "chrome-service"
}
}]
}
}
depends_on = [kubernetes_namespace.chrome_service]
}
# tls-secret for the chrome.viktorbarzin.me ingress is auto-cloned into
# every namespace by Kyverno's `sync-tls-secret` ClusterPolicy no local
# module call needed.
# --- Encrypted profile PVC ---
# Holds Chromium user data: cookies, localStorage, IndexedDB. Sites we
# drive may set auth tokens or session cookies encrypted is correct.
resource "kubernetes_persistent_volume_claim" "profile_encrypted" {
wait_until_bound = false
metadata {
name = "chrome-service-profile-encrypted"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "10Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm-encrypted"
resources {
requests = {
storage = "2Gi"
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
# --- NFS backup target ---
module "nfs_chrome_service_backup_host" {
source = "../../modules/kubernetes/nfs_volume"
name = "chrome-service-backup-host"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs/chrome-service-backup"
}
# --- Deployment ---
resource "kubernetes_deployment" "chrome_service" {
metadata {
name = "chrome-service"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = merge(local.labels, {
tier = local.tiers.aux
# Deliberate pin: chrome-service's playwright image MUST match
# the playwright Python version in f1-stream (see local.image
# comment above). Opt out of Keel auto-update via this label
# the inject-keel-annotations ClusterPolicy excludes workloads
# selector-matching keel.sh/policy=never.
"keel.sh/policy" = "never"
})
annotations = {
"reloader.stakater.com/auto" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = local.labels
}
template {
metadata {
labels = local.labels
}
spec {
# The noVNC sidecar pulls from registry.viktorbarzin.me which needs
# auth. Kyverno's `sync-registry-credentials` ClusterPolicy syncs
# the secret into every namespace.
image_pull_secrets {
name = "registry-credentials"
}
security_context {
run_as_user = 1000
run_as_group = 1000
fs_group = 1000
seccomp_profile {
type = "RuntimeDefault"
}
}
# Fix profile dir ownership (PVC may have root-owned files from prior run).
init_container {
name = "fix-perms"
image = "busybox:1.37"
command = ["sh", "-c", "chown -R 1000:1000 /profile"]
security_context {
run_as_user = 0
}
volume_mount {
name = "profile"
mount_path = "/profile"
}
resources {
requests = { memory = "32Mi" }
limits = { memory = "64Mi" }
}
}
container {
name = "chrome-service"
image = local.image
image_pull_policy = "IfNotPresent"
# Direct chromium launch (NOT `playwright launch-server`). Reason:
# launch-server creates ephemeral browser contexts per `connect()`
# call, so cookies/localStorage never persist to the PVC the
# `/profile` mount only ever held npm cache + fontconfig.
# Replaced 2026-06-04 with a CDP+persistent-profile model so the
# warm browser (where Viktor logs in via noVNC) keeps cookies, and
# the hourly snapshot-harvester CronJob can dump them via the
# CDP endpoint. Callers migrate `chromium.connect()`
# `chromium.connect_over_cdp()` (see f1-stream's playback_verifier).
#
# --remote-debugging-port=9222 : TCP CDP (vs default pipe).
# --remote-debugging-address=0.0.0.0 : bind on all pod IFs;
# NetworkPolicy is the gate.
# --remote-allow-origins=* : Chrome 111+ requires for
# non-loopback CDP origins.
# --user-data-dir=/profile/chromium-data: persistent profile on
# the encrypted PVC.
command = ["bash", "-c"]
args = [
<<-EOT
set -e
# Locate chromium in the Microsoft image. The path is
# /ms-playwright/chromium-XXXX/chrome-linux/chrome where XXXX
# is the playwright-pinned build; resolve at runtime so a minor
# bump of the image doesn't break the launch line.
CHROMIUM=$(find /ms-playwright -maxdepth 4 -name 'chrome' -type f -executable -path '*/chrome-linux/*' 2>/dev/null | head -1)
if [ -z "$CHROMIUM" ]; then
echo "ERROR: chromium binary not found under /ms-playwright" >&2
exit 1
fi
echo "[chrome-service] using chromium: $CHROMIUM"
# -listen tcp enables localhost:6099 so the noVNC sidecar can
# attach over the pod's shared network ns (Ubuntu 24.04
# defaults Xvfb to -nolisten tcp). -ac disables X access
# control; safe because Xvfb only listens on the pod's lo.
Xvfb :99 -screen 0 1280x720x24 -listen tcp -ac &
sleep 1
mkdir -p /profile/chromium-data ${local.snapshot_dir}
# Why a bridge?
# Stock Chrome binaries silently ignore --remote-debugging-address
# (the flag is gated by a build-time switch most distributions don't
# set), so CDP always binds 127.0.0.1:<port> regardless of what we
# pass. The K8s liveness/readiness probe + cluster callers reach
# the pod via its pod-IP, never localhost.
# Fix: chromium listens on 127.0.0.1:9223 (hidden internal port),
# cdp_bridge.py listens on 0.0.0.0:9222 (the public CDP port) and
# transparently forwards. K8s Service, probes, NetworkPolicy all
# stay on 9222 no caller-side changes needed.
# (Microsoft playwright image ships python3 but not socat, so the
# bridge is a tiny stdlib script see files/cdp_bridge.py.)
python3 /scripts/cdp_bridge.py &
BRIDGE_PID=$!
trap "kill $BRIDGE_PID 2>/dev/null" EXIT
exec "$CHROMIUM" \
--remote-debugging-port=9223 \
--remote-allow-origins=* \
--user-data-dir=/profile/chromium-data \
--no-sandbox \
--no-first-run \
--no-default-browser-check \
--disable-blink-features=AutomationControlled \
--disable-features=IsolateOrigins,site-per-process \
--autoplay-policy=no-user-gesture-required \
--disable-dev-shm-usage \
--password-store=basic \
--use-mock-keychain \
about:blank
EOT
]
env {
name = "DISPLAY"
value = ":99"
}
env {
name = "HOME"
value = "/profile"
}
port {
name = "cdp"
container_port = 9222
protocol = "TCP"
}
# Chrome's CDP endpoint serves /json/version once it's bound;
# TCP-open is enough for readiness.
liveness_probe {
tcp_socket { port = 9222 }
initial_delay_seconds = 30
period_seconds = 30
failure_threshold = 3
}
readiness_probe {
tcp_socket { port = 9222 }
initial_delay_seconds = 10
period_seconds = 10
}
startup_probe {
tcp_socket { port = 9222 }
period_seconds = 5
failure_threshold = 24 # up to 2 minutes
}
volume_mount {
name = "profile"
mount_path = "/profile"
}
volume_mount {
name = "dshm"
mount_path = "/dev/shm"
}
# /scripts/cdp_bridge.py provides the 0.0.0.0:9222 127.0.0.1:9223
# TCP forwarder (see entrypoint comment above for why).
volume_mount {
name = "scripts"
mount_path = "/scripts"
read_only = true
}
resources {
requests = {
cpu = "200m"
memory = "1500Mi"
}
limits = {
memory = "2Gi"
}
}
}
# noVNC sidecar exposes a live HTML5 view of the headed Chromium
# session via x11vnc + websockify, gated by the Authentik-protected
# ingress at chrome.viktorbarzin.me. CDP port 9222 (the new
# Playwright endpoint) stays internal-only.
container {
name = "novnc"
# Phase 3 cutover 2026-05-07 Forgejo registry consolidation.
image = "forgejo.viktorbarzin.me/viktor/chrome-service-novnc:v4"
image_pull_policy = "IfNotPresent"
port {
name = "http"
container_port = 6080
protocol = "TCP"
}
# x11vnc connects to the chrome-service container's Xvfb over
# localhost TCP (shared pod network). Same uid 1000 as chrome
# container so we can read MIT-MAGIC-COOKIE if Xvfb adds one.
resources {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "96Mi" }
}
}
# snapshot-server sidecar serves the hourly storage-state.json
# snapshot (written by the snapshot-harvester CronJob to the same
# PVC) over an HTTP endpoint, bearer-gated by PW_TOKEN. Mounted
# behind Traefik at chrome.viktorbarzin.me/api/snapshot with
# auth=none; the bearer check inside this server is the gate.
# Source: files/snapshot_server.py 60 lines, stdlib only.
container {
name = "snapshot-server"
image = local.python_image
image_pull_policy = "IfNotPresent"
command = ["python3", "/scripts/snapshot_server.py"]
env {
name = "PW_TOKEN"
value_from {
secret_key_ref {
name = "chrome-service-secrets"
key = "api_bearer_token"
}
}
}
env {
name = "SNAPSHOT_PATH"
value = "${local.snapshot_dir}/storage-state.json"
}
env {
name = "PORT"
value = "8088"
}
port {
name = "snap"
container_port = 8088
protocol = "TCP"
}
liveness_probe {
http_get {
path = "/healthz"
port = 8088
}
initial_delay_seconds = 5
period_seconds = 30
}
readiness_probe {
http_get {
path = "/healthz"
port = 8088
}
initial_delay_seconds = 2
period_seconds = 10
}
volume_mount {
name = "profile"
mount_path = "/profile"
read_only = true
}
volume_mount {
name = "scripts"
mount_path = "/scripts"
read_only = true
}
resources {
requests = { cpu = "5m", memory = "32Mi" }
limits = { memory = "96Mi" }
}
}
volume {
name = "profile"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.profile_encrypted.metadata[0].name
}
}
volume {
name = "dshm"
empty_dir {
medium = "Memory"
size_limit = "256Mi"
}
}
volume {
name = "scripts"
config_map {
name = kubernetes_config_map_v1.snapshot_scripts.metadata[0].name
default_mode = "0555"
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
spec[0].template[0].spec[0].container[1].image,
spec[0].template[0].spec[0].init_container[0].image,
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
# --- ConfigMap: sidecar + harvester scripts ---
resource "kubernetes_config_map_v1" "snapshot_scripts" {
metadata {
name = "snapshot-scripts"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = local.labels
}
data = {
"snapshot_server.py" = file("${path.module}/files/snapshot_server.py")
"snapshot_harvester.py" = file("${path.module}/files/snapshot_harvester.py")
# Tiny TCP forwarder used by chrome-service container to bridge
# 0.0.0.0:9222 127.0.0.1:9223 (Chromium silently ignores
# --remote-debugging-address on stock builds; see cdp_bridge.py).
"cdp_bridge.py" = file("${path.module}/files/cdp_bridge.py")
}
}
# --- Services ---
# CDP endpoint (internal only, gated by NetworkPolicy). 2026-06-04: switched
# from Playwright WS (:3000) to direct chromium CDP (:9222) so the persistent
# user-data-dir actually persists cookies; callers use `connect_over_cdp()`.
resource "kubernetes_service" "chrome_service" {
metadata {
name = "chrome-service"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = local.labels
}
spec {
selector = local.labels
port {
name = "cdp"
port = 9222
target_port = 9222
protocol = "TCP"
}
}
}
# noVNC view (Authentik-gated, exposed via ingress).
resource "kubernetes_service" "chrome_novnc" {
metadata {
name = "chrome"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = local.labels
}
spec {
selector = local.labels
port {
name = "http"
port = 80
target_port = 6080
protocol = "TCP"
}
}
}
# Snapshot-server endpoint (bearer-gated, exposed via ingress sub-path
# chrome.viktorbarzin.me/api/snapshot auth=none at the ingress layer
# because the bearer check happens inside snapshot_server.py).
resource "kubernetes_service" "chrome_snapshot" {
metadata {
name = "chrome-snapshot"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
labels = local.labels
}
spec {
selector = local.labels
port {
name = "snap"
port = 8088
target_port = 8088
protocol = "TCP"
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
dns_type = "proxied"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
name = "chrome"
tls_secret_name = var.tls_secret_name
auth = "required"
# noVNC defaults to /vnc.html auto-redirect / there.
ingress_path = ["/"]
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Chrome Service"
"gethomepage.dev/description" = "Live noVNC view of headed Chromium"
"gethomepage.dev/icon" = "chromium.png"
"gethomepage.dev/group" = "Infrastructure"
}
}
# Second ingress on the same host (chrome.viktorbarzin.me) carving out
# /api/snapshot to the snapshot-server sidecar. Path-level carve-out
# pattern see CLAUDE.md "For path-level carve-outs (e.g. wrongmove has
# `/` behind Anubis but `/api` direct), declare a second ingress_factory
# with `ingress_path = ["/<path>"]` pointing at the bare backend service."
module "ingress_snapshot" {
source = "../../modules/kubernetes/ingress_factory"
# auth = "none": bearer-token gated inside snapshot-server.py; Authentik
# forward-auth would require an OIDC cookie that the dev-box refresh
# timer can't replay.
auth = "none"
dns_type = "none" # DNS already created by module.ingress
namespace = kubernetes_namespace.chrome_service.metadata[0].name
name = "chrome-snapshot"
host = "chrome"
service_name = kubernetes_service.chrome_snapshot.metadata[0].name
port = 8088
ingress_path = ["/api/snapshot"]
tls_secret_name = var.tls_secret_name
extra_annotations = {
"gethomepage.dev/enabled" = "false"
}
}
# --- NetworkPolicy: scoped ingress.
# - TCP/9222 (Chromium CDP): only from labelled client namespaces.
# - TCP/6080 (noVNC HTTP+WS): only from the traefik namespace (public path
# is chrome.viktorbarzin.me Traefik sidecar; Authentik forward-auth
# gates external access at the Traefik layer).
# - TCP/8088 (snapshot-server): only from the traefik namespace
# (chrome.viktorbarzin.me/api/snapshot Traefik sidecar; bearer token
# is the gate inside snapshot-server.py).
# The cluster has no default-deny, so this NP only takes effect inside
# chrome-service ns pods elsewhere remain unaffected.
resource "kubernetes_network_policy_v1" "ws_ingress" {
metadata {
name = "chrome-service-ws-ingress"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
}
spec {
pod_selector {
match_labels = local.labels
}
policy_types = ["Ingress"]
ingress {
from {
namespace_selector {
match_labels = {
"chrome-service.viktorbarzin.me/client" = "true"
}
}
}
# Explicit fallback list admit f1-stream by name in case the label
# is removed by accident. Keep this in sync with the labels above.
from {
namespace_selector {
match_labels = {
"kubernetes.io/metadata.name" = "f1-stream"
}
}
}
# Also admit chrome-service's own namespace (the snapshot-harvester
# CronJob runs here and needs to reach the CDP endpoint).
from {
namespace_selector {
match_labels = {
"kubernetes.io/metadata.name" = "chrome-service"
}
}
}
ports {
port = "9222"
protocol = "TCP"
}
}
ingress {
from {
namespace_selector {
match_labels = {
"kubernetes.io/metadata.name" = "traefik"
}
}
}
ports {
port = "6080"
protocol = "TCP"
}
ports {
port = "8088"
protocol = "TCP"
}
}
}
}
# --- Backup CronJob: tar+gzip the profile every 6h, 30-day retention. ---
resource "kubernetes_cron_job_v1" "chrome_service_backup" {
metadata {
name = "chrome-service-backup"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 1
schedule = "47 */6 * * *"
starting_deadline_seconds = 60
job_template {
metadata {}
spec {
backoff_limit = 2
ttl_seconds_after_finished = 300
template {
metadata {}
spec {
# PVC is RWO colocate the backup pod with the chrome-service
# pod so both can mount the volume on the same node.
affinity {
pod_affinity {
required_during_scheduling_ignored_during_execution {
label_selector {
match_labels = local.labels
}
topology_key = "kubernetes.io/hostname"
}
}
}
container {
name = "backup"
image = "docker.io/library/alpine:3.20"
command = ["/bin/sh", "-c", <<-EOT
set -euxo pipefail
ts=$(date +"%Y_%m_%d_%H")
tar -czf /backup/$${ts}.tar.gz -C /profile .
find /backup -maxdepth 1 -type f -name '*.tar.gz' -mtime +30 -delete
echo "Backup complete: $${ts}.tar.gz"
EOT
]
volume_mount {
name = "profile"
mount_path = "/profile"
read_only = true
}
volume_mount {
name = "backup"
mount_path = "/backup"
}
resources {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "64Mi" }
}
}
volume {
name = "profile"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.profile_encrypted.metadata[0].name
}
}
volume {
name = "backup"
persistent_volume_claim {
claim_name = module.nfs_chrome_service_backup_host.claim_name
}
}
restart_policy = "OnFailure"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# --- Snapshot harvester CronJob: hourly storage_state() dump via CDP ---
# Connects to the live chrome-service CDP endpoint, accesses the
# persistent default browser context (where Viktor's noVNC logins live),
# and writes cookies + localStorage to /profile/snapshots/storage-state.json
# (atomic rename). The snapshot-server sidecar reads from the same file.
resource "kubernetes_cron_job_v1" "chrome_service_snapshot_harvester" {
metadata {
name = "chrome-service-snapshot-harvester"
namespace = kubernetes_namespace.chrome_service.metadata[0].name
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 1
# Hourly, offset from the backup CronJob (which runs at :47 every 6h)
# so they don't fight for the encrypted PVC at the same minute.
schedule = "23 * * * *"
starting_deadline_seconds = 60
job_template {
metadata {}
spec {
backoff_limit = 2
ttl_seconds_after_finished = 300
template {
metadata {}
spec {
# PVC is RWO colocate with the chrome-service pod.
affinity {
pod_affinity {
required_during_scheduling_ignored_during_execution {
label_selector {
match_labels = local.labels
}
topology_key = "kubernetes.io/hostname"
}
}
}
container {
name = "harvester"
image = local.python_image
image_pull_policy = "IfNotPresent"
# The Microsoft playwright/python image ships only browsers +
# Python the `playwright` pip package itself is NOT installed
# (it's meant for CI that brings its own requirements). We
# install at startup, caching to the PVC so subsequent runs
# are near-instant.
command = ["bash", "-c"]
args = [
<<-EOT
set -e
export PIP_CACHE_DIR=/profile/.cache/pip
export PIP_DISABLE_PIP_VERSION_CHECK=1
python3 -c 'import playwright' 2>/dev/null \
|| pip install --quiet --no-warn-script-location playwright==1.48.0
exec python3 /scripts/snapshot_harvester.py
EOT
]
env {
name = "CDP_URL"
value = "http://chrome-service.chrome-service.svc.cluster.local:9222"
}
env {
name = "SNAPSHOT_DIR"
value = local.snapshot_dir
}
# Don't try to download browsers connect_over_cdp doesn't
# need them locally.
env {
name = "PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD"
value = "1"
}
volume_mount {
name = "profile"
mount_path = "/profile"
}
volume_mount {
name = "scripts"
mount_path = "/scripts"
read_only = true
}
resources {
requests = { cpu = "20m", memory = "128Mi" }
limits = { memory = "512Mi" }
}
}
volume {
name = "profile"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.profile_encrypted.metadata[0].name
}
}
volume {
name = "scripts"
config_map {
name = kubernetes_config_map_v1.snapshot_scripts.metadata[0].name
default_mode = "0555"
}
}
restart_policy = "OnFailure"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}

73
stacks/city-guesser/.terraform.lock.hcl generated Normal file
View file

@ -0,0 +1,73 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.7"
constraints = "~> 4.0"
hashes = [
"h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
"zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
"zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
"zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
"zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
"zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
"zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
"zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
"zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
"zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
"zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
"zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
"zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
"zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.1.0"
hashes = [
"h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=",
]
}
provider "registry.terraform.io/hashicorp/vault" {
version = "4.8.0"
constraints = "~> 4.0"
hashes = [
"h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
"h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
"zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
"zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
"zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
"zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
"zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
"zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
"zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
"zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
"zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
"zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
]
}

121
stacks/city-guesser/main.tf Normal file
View file

@ -0,0 +1,121 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
resource "kubernetes_namespace" "city-guesser" {
metadata {
name = "city-guesser"
labels = {
"istio-injection" : "disabled"
tier = local.tiers.aux
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = "city-guesser"
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "city-guesser" {
metadata {
name = "city-guesser"
namespace = "city-guesser"
labels = {
run = "city-guesser"
tier = local.tiers.aux
}
}
spec {
replicas = 1
selector {
match_labels = {
run = "city-guesser"
}
}
template {
metadata {
labels = {
run = "city-guesser"
}
}
spec {
container {
image = "viktorbarzin/city-guesser:latest"
name = "city-guesser"
resources {
limits = {
memory = "64Mi"
}
requests = {
cpu = "10m"
memory = "64Mi"
}
}
port {
container_port = 80
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "city-guesser" {
metadata {
name = "city-guesser"
namespace = "city-guesser"
labels = {
"run" = "city-guesser"
}
}
spec {
selector = {
run = "city-guesser"
}
port {
name = "http"
port = "80"
target_port = "80"
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
dns_type = "proxied"
namespace = "city-guesser"
name = "city-guesser"
tls_secret_name = var.tls_secret_name
auth = "required"
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "City Guesser"
"gethomepage.dev/description" = "Geography game"
"gethomepage.dev/icon" = "mdi-earth"
"gethomepage.dev/group" = "Other"
"gethomepage.dev/pod-selector" = ""
}
}

View file

@ -0,0 +1,33 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}

1
stacks/city-guesser/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}

View file

@ -0,0 +1,870 @@
data "vault_kv_secret_v2" "secrets" {
mount = "secret"
name = "claude-agent-service"
}
data "vault_kv_secret_v2" "viktor_secrets" {
mount = "secret"
name = "viktor"
}
locals {
namespace = "claude-agent"
# Phase 3 cutover 2026-05-07 see infra/docs/plans/2026-05-07-forgejo-registry-consolidation-plan.md.
image = "forgejo.viktorbarzin.me/viktor/claude-agent-service"
image_tag = "latest"
labels = {
app = "claude-agent-service"
}
}
# --- Namespace ---
resource "kubernetes_namespace" "claude_agent" {
metadata {
name = local.namespace
labels = {
tier = local.tiers.aux
"resource-governance/custom-limitrange" = "true"
"resource-governance/custom-quota" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
# --- Secrets ---
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "claude-agent-secrets"
namespace = local.namespace
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "claude-agent-secrets"
}
data = [
{
secretKey = "GITHUB_TOKEN"
remoteRef = {
key = "viktor"
property = "github_pat"
}
},
{
# Forgejo push token for opening PRs on forgejo.viktorbarzin.me
# (exec agent uses the Forgejo API via curl + $FORGEJO_TOKEN, and
# git push over HTTPS via the url.insteadOf rewrite in git-init).
# SECURITY: this is the viktor-scoped admin PAT (write:package +
# repo) shared by Woodpecker see secret/ci/global/forgejo_push_token.
# The shared claude-agent pod (all agents on it) can now push to
# and open PRs against any repo this token can reach.
secretKey = "FORGEJO_TOKEN"
remoteRef = {
key = "ci/global"
property = "forgejo_push_token"
}
},
{
secretKey = "API_BEARER_TOKEN"
remoteRef = {
key = "claude-agent-service"
property = "api_bearer_token"
}
},
{
# Long-lived OAuth token (1-year) from `claude setup-token`.
# Preferred over the short-lived .credentials.json CLI picks this up and
# skips the refresh flow entirely. Rotate yearly; alert 30d before expiry.
secretKey = "CLAUDE_CODE_OAUTH_TOKEN"
remoteRef = {
key = "claude-agent-service"
property = "claude_oauth_token"
}
},
{
# Consumed by service-upgrade agent to poll ci.viktorbarzin.me
# per-workflow status. Pod has no Vault CLI auth, so the old
# `vault kv get` path is dead see bd code-3o3.
secretKey = "WOODPECKER_API_TOKEN"
remoteRef = {
key = "ci/global"
property = "woodpecker_api_token"
}
},
{
# Consumed by service-upgrade agent for Start/Success/Failure
# notifications. Same shared webhook as alertmanager.
secretKey = "SLACK_WEBHOOK_URL"
remoteRef = {
key = "viktor"
property = "alertmanager_slack_api_url"
}
},
{
# Home Assistant MCP endpoint (community ha-mcp add-on on ha-sofia).
# The URL embeds a secret path-token, so it ships as a secret, not a
# literal. Referenced as ${HA_MCP_URL} by the project-scoped .mcp.json
# in the infra repo root. Same Vault key OpenClaw uses
# (secret/openclaw -> ha_sofia_mcp_url).
secretKey = "HA_MCP_URL"
remoteRef = {
key = "openclaw"
property = "ha_sofia_mcp_url"
}
},
]
}
}
depends_on = [kubernetes_namespace.claude_agent]
}
# SOPS age key for terraform state decryption
resource "kubernetes_secret" "sops_age_key" {
metadata {
name = "sops-age-key"
namespace = kubernetes_namespace.claude_agent.metadata[0].name
}
data = {
"keys.txt" = data.vault_kv_secret_v2.viktor_secrets.data["sops_age_key_devvm"]
}
type = "Opaque"
}
# Claude OAuth credentials (for claude -p)
resource "kubernetes_secret" "claude_credentials" {
metadata {
name = "claude-credentials"
namespace = kubernetes_namespace.claude_agent.metadata[0].name
}
data = {
".credentials.json" = data.vault_kv_secret_v2.secrets.data["claude_credentials_json"]
}
type = "Opaque"
}
# git-crypt key for repo decryption
resource "kubernetes_config_map" "git_crypt_key" {
metadata {
name = "git-crypt-key"
namespace = kubernetes_namespace.claude_agent.metadata[0].name
}
binary_data = {
"key" = filebase64("${path.root}/../../.git/git-crypt/keys/default")
}
}
# --- RBAC ---
resource "kubernetes_service_account" "claude_agent" {
metadata {
name = "claude-agent"
namespace = kubernetes_namespace.claude_agent.metadata[0].name
}
}
resource "kubernetes_cluster_role" "claude_agent" {
metadata {
name = "claude-agent"
}
rule {
verbs = ["get", "list", "watch"]
api_groups = ["", "apps", "batch"]
resources = ["pods", "pods/log", "nodes", "events", "deployments", "services", "namespaces", "jobs", "cronjobs", "configmaps", "replicasets", "statefulsets", "daemonsets"]
}
rule {
verbs = ["patch", "update"]
api_groups = ["apps"]
resources = ["deployments"]
}
rule {
verbs = ["create"]
api_groups = [""]
resources = ["pods/exec"]
}
}
resource "kubernetes_cluster_role_binding" "claude_agent" {
metadata {
name = "claude-agent"
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.claude_agent.metadata[0].name
namespace = kubernetes_namespace.claude_agent.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.claude_agent.metadata[0].name
}
}
# -----------------------------------------------------------------------------
# claude-agent-exec broad cluster WRITE for the executor agent
# -----------------------------------------------------------------------------
# Added 2026-06-04 for the nextcloud-todos-exec executor elevation. The
# existing `claude-agent` ClusterRole above stays as-is (read + patch
# deployments + pods/exec) this is purely ADDITIVE.
#
# SECURITY VERY BROAD, FLAG FOR REVIEW:
# - This grants the SHARED claude-agent pod cluster-wide
# get/list/watch/create/update/patch/delete across the common API groups
# below. EVERY agent that runs on this pod inherits it.
# - It explicitly includes core `secrets` (read+write, cluster-wide) and
# rbac roles/rolebindings (create/update/delete) i.e. the agent can
# read any Secret in any namespace and grant itself further RBAC. That is
# close to cluster-admin in blast radius, minus a few group wildcards.
# - It intentionally does NOT bind the built-in `cluster-admin` ClusterRole,
# so it lacks: arbitrary CRDs/apiextensions, clusterroles/clusterrolebindings
# bind/escalate beyond what's listed, raw `*` on `*`. Viktor can widen to
# `cluster-admin` by swapping the role_ref below if he decides the scoped
# list is too restrictive.
# Terraform-managed cluster resources must still be changed via `scripts/tg
# apply` (CLAUDE.md Terraform-only rule) this RBAC is for ad-hoc kubectl
# writes the exec agent needs, not a license to drift Terraform state.
resource "kubernetes_cluster_role" "claude_agent_exec" {
metadata {
name = "claude-agent-exec"
}
rule {
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
api_groups = [""]
resources = ["pods", "pods/log", "pods/exec", "services", "configmaps", "secrets", "persistentvolumeclaims", "serviceaccounts", "namespaces", "events", "endpoints"]
}
rule {
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
api_groups = ["apps"]
resources = ["deployments", "statefulsets", "daemonsets", "replicasets"]
}
rule {
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
api_groups = ["batch"]
resources = ["jobs", "cronjobs"]
}
rule {
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
api_groups = ["networking.k8s.io"]
resources = ["ingresses", "networkpolicies"]
}
rule {
verbs = ["get", "list", "watch", "create", "update", "patch", "delete"]
api_groups = ["rbac.authorization.k8s.io"]
resources = ["roles", "rolebindings"]
}
}
resource "kubernetes_cluster_role_binding" "claude_agent_exec" {
metadata {
name = "claude-agent-exec"
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.claude_agent.metadata[0].name
namespace = kubernetes_namespace.claude_agent.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.claude_agent_exec.metadata[0].name
}
}
# --- Storage ---
#
# The `workspace` volume in the deployment is intentionally emptyDir agent
# jobs do fresh git clones each run, so a per-pod scratch dir on node disk
# is faster and isolated. The 10Gi `claude-agent-workspace-encrypted` PVC
# that previously sat next to this comment was created but never wired
# into the deployment (sat idle from 2026-04-15 to 2026-05-11).
#
# For cases where the agent DOES need to persist state across pod restarts
# (caches, ad-hoc outputs, anything that should survive a pod reschedule),
# `module.persistent` below provides a 5Gi NFS-backed RWX volume mounted
# at /persistent for state that should survive a pod reschedule. Since the
# service now runs jobs concurrently (bounded semaphore, no single-flight
# lock), agents sharing /persistent must use per-job paths to avoid races
# per-job *workspaces* are isolated (own clone under /workspace/jobs/<id>),
# but /persistent is shared.
module "persistent" {
source = "../../modules/kubernetes/nfs_volume"
name = "claude-agent-persistent"
namespace = kubernetes_namespace.claude_agent.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs/claude-agent-persistent"
storage = "5Gi"
}
# --- Deployment ---
resource "kubernetes_deployment" "claude_agent" {
metadata {
name = "claude-agent-service"
namespace = kubernetes_namespace.claude_agent.metadata[0].name
labels = local.labels
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = local.labels
}
template {
metadata {
labels = local.labels
}
spec {
service_account_name = kubernetes_service_account.claude_agent.metadata[0].name
image_pull_secrets {
name = "registry-credentials"
}
security_context {
run_as_user = 1000
run_as_group = 1000
fs_group = 1000
}
# Fix workspace ownership. Kubelet creates the Dockerfile WORKDIR
# (/workspace/infra) inside the emptyDir as root:gid=fsGroup with
# the setgid bit uid 1000 can't write into it without explicit
# chown + chmod. Pre-create so the path is guaranteed, then chown
# recursively and chmod the infra subdir for safety.
init_container {
name = "fix-perms"
image = "busybox:1.37"
command = ["sh", "-c", "mkdir -p /workspace/infra /persistent && chown -R 1000:1000 /workspace /persistent && chmod 0775 /workspace/infra /persistent"]
security_context {
run_as_user = 0
}
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
volume_mount {
name = "persistent"
mount_path = "/persistent"
}
resources {
requests = {
memory = "32Mi"
}
limits = {
memory = "64Mi"
}
}
}
# Copy Claude credentials to writable volume (CLI needs to refresh OAuth tokens)
init_container {
name = "copy-claude-creds"
image = "busybox:1.37"
command = ["sh", "-c", "cp /secrets/claude/.credentials.json /home/agent/.claude/.credentials.json && chown 1000:1000 /home/agent/.claude/.credentials.json"]
security_context {
run_as_user = 0
}
volume_mount {
name = "claude-credentials-secret"
mount_path = "/secrets/claude"
}
volume_mount {
name = "claude-home"
mount_path = "/home/agent/.claude"
}
resources {
requests = {
memory = "32Mi"
}
limits = {
memory = "64Mi"
}
}
}
# Init: clone repo + unlock git-crypt on first run
init_container {
name = "git-init"
image = "${local.image}:${local.image_tag}"
command = ["sh", "-c", <<-EOF
set -e
# Configure git with HTTPS + PAT
git config --global user.name "Claude Agent Service"
git config --global user.email "claude-agent@viktorbarzin.me"
git config --global --add safe.directory /workspace/infra
git config --global url."https://$${GITHUB_TOKEN}@github.com/".insteadOf "git@github.com:"
git config --global url."https://$${GITHUB_TOKEN}@github.com/".insteadOf "https://github.com/"
# Authenticate git pushes/clones to Forgejo so the exec agent can
# branch + push to open PRs on forgejo.viktorbarzin.me. The PR
# itself is created via the Forgejo API (curl + $FORGEJO_TOKEN);
# this rewrite only handles the git transport.
if [ -n "$${FORGEJO_TOKEN}" ]; then
git config --global url."https://$${FORGEJO_TOKEN}@forgejo.viktorbarzin.me/".insteadOf "https://forgejo.viktorbarzin.me/"
fi
# Clone or update repo
if [ ! -d /workspace/infra/.git ]; then
git clone https://$${GITHUB_TOKEN}@github.com/ViktorBarzin/infra.git /workspace/infra
else
cd /workspace/infra
git fetch origin
git reset --hard origin/master
fi
# Unlock git-crypt
cd /workspace/infra
git-crypt unlock /secrets/git-crypt/key || true
EOF
]
env_from {
secret_ref {
name = "claude-agent-secrets"
}
}
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
volume_mount {
name = "git-crypt-key"
mount_path = "/secrets/git-crypt"
}
resources {
requests = {
cpu = "100m"
memory = "256Mi"
}
limits = {
memory = "512Mi"
}
}
}
# Seed beads metadata + beads-task-runner agent into runtime volumes.
# The Dockerfile stages these files at /usr/share/agent-seed/ (image
# layer, never mounted). Both /workspace (PVC) and /home/agent/.claude
# (emptyDir) are volume mounts that hide any image-layer content, so
# the files have to be copied in at pod start. Also creates the
# scratch directory the beads-task-runner rails expect.
init_container {
name = "seed-beads-agent"
image = "${local.image}:${local.image_tag}"
command = ["sh", "-c", <<-EOT
set -e
mkdir -p /workspace/.beads /workspace/scratch /home/agent/.claude/agents
cp /usr/share/agent-seed/beads-metadata.json /workspace/.beads/metadata.json
cp /usr/share/agent-seed/beads-task-runner.md /home/agent/.claude/agents/beads-task-runner.md
cp /usr/share/agent-seed/recruiter-triage.md /home/agent/.claude/agents/recruiter-triage.md
cp /usr/share/agent-seed/nextcloud-todos-planner.md /home/agent/.claude/agents/nextcloud-todos-planner.md
cp /usr/share/agent-seed/nextcloud-todos-exec.md /home/agent/.claude/agents/nextcloud-todos-exec.md
EOT
]
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
volume_mount {
name = "claude-home"
mount_path = "/home/agent/.claude"
}
resources {
requests = {
memory = "32Mi"
}
limits = {
memory = "64Mi"
}
}
}
container {
name = "claude-agent-service"
image = "${local.image}:${local.image_tag}"
# Wrap the image CMD so a Vault token is in place before any agent
# runs `scripts/tg apply`. The `vault-token-refresher` sidecar
# k8s-auth-logs-in (role=terraform-state) and writes the token to the
# shared `vault-token` emptyDir at /vault/token; we symlink
# $HOME/.vault-token that file so `vault` / `scripts/tg` (which fall
# through to ~/.vault-token when $VAULT_TOKEN is unset) pick it up.
# NOTE: this duplicates the image's CMD (uvicorn line below) if the
# Dockerfile CMD changes, update this too. FLAG for review.
command = ["/bin/sh", "-c", <<-EOF
ln -sfn /vault/token "$HOME/.vault-token"
exec python3 -m uvicorn app.main:app --host 0.0.0.0 --port 8080 --app-dir /srv
EOF
]
port {
container_port = 8080
}
env_from {
secret_ref {
name = "claude-agent-secrets"
}
}
env {
name = "WORKSPACE_DIR"
value = "/workspace/infra"
}
# Soft-unbounded concurrency: this caps simultaneous agent runs;
# excess calls queue FIFO rather than 409/503. Each run peaks ~0.5-1.5Gi
# (claude + terraform), so this and the memory limit are sized together.
env {
name = "MAX_CONCURRENCY"
value = "10"
}
# Vault so `scripts/tg apply` can fetch the Tier-1 PG backend
# password + the broadened app-secret reads. The CLI + scripts/tg
# fall through to $HOME/.vault-token (symlinked above) when
# $VAULT_TOKEN is unset; VAULT_K8S_ROLE tells the refresher which
# role to log in as.
env {
name = "VAULT_ADDR"
value = "http://vault-active.vault.svc.cluster.local:8200"
}
env {
name = "VAULT_K8S_ROLE"
value = "terraform-state"
}
# NOTE on MCP: the HA MCP URL (secret its path segment is the auth
# token) arrives as env `HA_MCP_URL` via the claude-agent-secrets
# ExternalSecret (env_from above), sourced from Vault
# secret/openclaw -> ha_sofia_mcp_url. The project-scoped .mcp.json
# in the infra repo root references it as ${HA_MCP_URL}. Paperless
# MCP needs no token in-cluster (bearer is enforced only at the
# Traefik ingress), so its in-cluster Service URL is a plain literal
# in .mcp.json.
liveness_probe {
http_get {
path = "/health"
port = 8080
}
initial_delay_seconds = 10
period_seconds = 30
}
readiness_probe {
http_get {
path = "/health"
port = 8080
}
initial_delay_seconds = 5
period_seconds = 10
}
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
volume_mount {
name = "persistent"
mount_path = "/persistent"
}
volume_mount {
name = "sops-age-key"
mount_path = "/home/agent/.config/sops/age"
}
volume_mount {
name = "claude-home"
mount_path = "/home/agent/.claude"
}
# git-crypt key each job re-unlocks its own clone, so the runtime
# container (not just the git-init init container) needs the key.
volume_mount {
name = "git-crypt-key"
mount_path = "/secrets/git-crypt"
}
# Shared Vault token written by the vault-token-refresher sidecar.
# Symlinked to $HOME/.vault-token by the container command above.
volume_mount {
name = "vault-token"
mount_path = "/vault"
}
# Burstable (tier-aux). Sized for ~10 concurrent agent runs at
# ~0.5-1.5Gi each (see MAX_CONCURRENCY). No CPU limit per cluster
# policy (CFS throttling); request only.
resources {
requests = {
cpu = "1"
memory = "2Gi"
}
limits = {
memory = "12Gi"
}
}
}
# Sidecar: keep a fresh Vault token on disk for `scripts/tg apply`.
# k8s-auth login (role=terraform-state) every 30 min well inside the
# 6-day token TTL and write it to the shared `vault-token` emptyDir.
# The main container symlinks $HOME/.vault-token at it. Mirrors the
# estate k8s-auth-login pattern (infra/.woodpecker/default.yml "Vault
# auth" step, woodpecker vault-sync sidecar).
container {
name = "vault-token-refresher"
image = "docker.io/curlimages/curl:8.11.0"
# No `set -e`: a transient Vault blip must NOT kill the refresh loop
# (the stale token keeps working until its 6d TTL). curlimages/curl
# is Alpine/busybox has `sed`, no `jq`, so parse client_token with
# sed. umask 077 so the token file is 0600.
command = ["/bin/sh", "-c", <<-EOF
umask 077
while true; do
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
TOKEN=$(curl -s -X POST "$VAULT_ADDR/v1/auth/kubernetes/login" \
-d "{\"role\":\"$VAULT_K8S_ROLE\",\"jwt\":\"$SA_TOKEN\"}" \
| sed -n 's/.*"client_token":"\([^"]*\)".*/\1/p')
if [ -n "$TOKEN" ]; then
printf '%s' "$TOKEN" > /vault/token
echo "$(date -u +%FT%TZ) refreshed vault token (role=$VAULT_K8S_ROLE)"
else
echo "$(date -u +%FT%TZ) ERROR: vault k8s login failed (role=$VAULT_K8S_ROLE)" >&2
fi
sleep 1800
done
EOF
]
env {
name = "VAULT_ADDR"
value = "http://vault-active.vault.svc.cluster.local:8200"
}
env {
name = "VAULT_K8S_ROLE"
value = "terraform-state"
}
volume_mount {
name = "vault-token"
mount_path = "/vault"
}
resources {
requests = { cpu = "5m", memory = "16Mi" }
limits = { memory = "32Mi" }
}
}
volume {
name = "workspace"
# Per-pod ephemeral scratch agent does fresh git clones each
# job, so node-disk emptyDir is faster than a network-backed PVC
# and avoids RWO contention across the 3 replicas.
empty_dir {}
}
volume {
name = "persistent"
persistent_volume_claim {
claim_name = module.persistent.claim_name
}
}
volume {
name = "sops-age-key"
secret {
secret_name = kubernetes_secret.sops_age_key.metadata[0].name
default_mode = "0600"
}
}
volume {
name = "git-crypt-key"
config_map {
name = kubernetes_config_map.git_crypt_key.metadata[0].name
}
}
volume {
name = "claude-credentials-secret"
secret {
secret_name = kubernetes_secret.claude_credentials.metadata[0].name
default_mode = "0600"
}
}
volume {
name = "claude-home"
empty_dir {}
}
# Holds the Vault token the refresher sidecar mints; main container
# symlinks $HOME/.vault-token at /vault/token. emptyDir (memory-backed
# not required) token is re-minted every 30 min and on pod restart.
volume {
name = "vault-token"
empty_dir {}
}
}
}
}
lifecycle {
ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
}
}
# --- Service ---
resource "kubernetes_service" "claude_agent" {
metadata {
name = "claude-agent-service"
namespace = kubernetes_namespace.claude_agent.metadata[0].name
labels = local.labels
}
spec {
selector = local.labels
port {
port = 8080
target_port = 8080
}
type = "ClusterIP"
}
}
# =============================================================================
# Token expiry monitor
# Long-lived CLAUDE_CODE_OAUTH_TOKEN values expire 1y after mint. We track
# mint timestamps here on rotation, update the map below. A CronJob pushes
# the computed expiry_timestamp to Pushgateway, Prometheus alerts 30d out.
# =============================================================================
locals {
claude_oauth_token_mint_epochs = {
# unix seconds (UTC) when `claude setup-token` finished minting
"primary" = 1776528429 # 2026-04-18T12:07:09Z (TOKEN2)
"spare-1" = 1776528280 # 2026-04-18T12:04:40Z (TOKEN1)
"spare-2" = 1776528429 # 2026-04-18T12:07:09Z (TOKEN2 redundant w/ primary)
}
claude_oauth_token_ttl_seconds = 365 * 24 * 60 * 60
}
resource "kubernetes_config_map" "claude_oauth_expiry" {
metadata {
name = "claude-oauth-expiry"
namespace = kubernetes_namespace.claude_agent.metadata[0].name
}
data = {
for path, mint in local.claude_oauth_token_mint_epochs :
path => tostring(mint + local.claude_oauth_token_ttl_seconds)
}
}
resource "kubernetes_cron_job_v1" "claude_oauth_expiry_monitor" {
metadata {
name = "claude-oauth-expiry-monitor"
namespace = kubernetes_namespace.claude_agent.metadata[0].name
}
spec {
concurrency_policy = "Replace"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 1
schedule = "17 */6 * * *" # every 6h at :17 past
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 300
template {
metadata {}
spec {
restart_policy = "OnFailure"
container {
name = "push-expiry"
image = "docker.io/curlimages/curl:8.11.0"
command = ["/bin/sh", "-c", <<-EOT
set -e
PG='http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/claude-oauth-expiry-monitor'
NOW=$(date +%s)
PAYLOAD=''
PAYLOAD="$${PAYLOAD}# HELP claude_oauth_token_expiry_timestamp Unix epoch when the CLAUDE_CODE_OAUTH_TOKEN for this path expires
"
PAYLOAD="$${PAYLOAD}# TYPE claude_oauth_token_expiry_timestamp gauge
"
for path in /mnt/expiry/*; do
name=$(basename "$path")
exp=$(cat "$path")
PAYLOAD="$${PAYLOAD}claude_oauth_token_expiry_timestamp{path=\"$name\"} $exp
"
done
PAYLOAD="$${PAYLOAD}# HELP claude_oauth_expiry_monitor_last_push_timestamp Last time the expiry monitor pushed metrics
"
PAYLOAD="$${PAYLOAD}# TYPE claude_oauth_expiry_monitor_last_push_timestamp gauge
"
PAYLOAD="$${PAYLOAD}claude_oauth_expiry_monitor_last_push_timestamp $NOW
"
echo "$PAYLOAD"
echo "$PAYLOAD" | curl -sS --data-binary @- "$PG"
echo "pushed at $NOW"
EOT
]
volume_mount {
name = "expiry"
mount_path = "/mnt/expiry"
}
resources {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "64Mi" }
}
}
volume {
name = "expiry"
config_map {
name = kubernetes_config_map.claude_oauth_expiry.metadata[0].name
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}

View file

@ -0,0 +1,18 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}
dependency "vault" {
config_path = "../vault"
skip_outputs = true
}
dependency "external-secrets" {
config_path = "../external-secrets"
skip_outputs = true
}

View file

@ -0,0 +1,305 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
variable "postgresql_host" { type = string }
variable "claude_memory_db_password" {
type = string
sensitive = true
default = "" # falls back to Vault `secret/claude-memory.db_password` below
}
data "vault_kv_secret_v2" "secrets" {
mount = "secret"
name = "claude-memory"
}
resource "kubernetes_namespace" "claude-memory" {
metadata {
name = "claude-memory"
labels = {
tier = local.tiers.aux
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "claude-memory-secrets"
namespace = "claude-memory"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "claude-memory-secrets"
}
dataFrom = [{
extract = {
key = "claude-memory"
}
}]
}
}
depends_on = [kubernetes_namespace.claude-memory]
}
# DB credentials from Vault database engine (rotated every 24h)
resource "kubernetes_manifest" "db_external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "claude-memory-db-creds"
namespace = "claude-memory"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-database"
kind = "ClusterSecretStore"
}
target = {
name = "claude-memory-db-creds"
template = {
data = {
DATABASE_URL = "postgresql://claude_memory:{{ .password }}@${var.postgresql_host}:5432/claude_memory"
DB_PASSWORD = "{{ .password }}"
}
}
}
data = [{
secretKey = "password"
remoteRef = {
key = "static-creds/pg-claude-memory"
property = "password"
}
}]
}
}
depends_on = [kubernetes_namespace.claude-memory]
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.claude-memory.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# Database init job
resource "kubernetes_job" "db_init" {
metadata {
name = "claude-memory-db-init"
namespace = kubernetes_namespace.claude-memory.metadata[0].name
}
spec {
template {
metadata {}
spec {
container {
name = "db-init"
image = "postgres:16-alpine"
command = [
"sh", "-c",
<<-EOT
set -e
# -d postgres: psql defaults database name to username; root user
# doesn't have a root-named database, so be explicit.
PGPASSWORD='${data.vault_kv_secret_v2.secrets.data["dbaas_root_password"]}' psql -h ${var.postgresql_host} -U root -d postgres -tc "SELECT 1 FROM pg_roles WHERE rolname='claude_memory'" | grep -q 1 || \
PGPASSWORD='${data.vault_kv_secret_v2.secrets.data["dbaas_root_password"]}' psql -h ${var.postgresql_host} -U root -d postgres -c "CREATE ROLE claude_memory WITH LOGIN PASSWORD '${coalesce(var.claude_memory_db_password, data.vault_kv_secret_v2.secrets.data["db_password"])}'"
PGPASSWORD='${data.vault_kv_secret_v2.secrets.data["dbaas_root_password"]}' psql -h ${var.postgresql_host} -U root -d postgres -tc "SELECT 1 FROM pg_database WHERE datname='claude_memory'" | grep -q 1 || \
PGPASSWORD='${data.vault_kv_secret_v2.secrets.data["dbaas_root_password"]}' psql -h ${var.postgresql_host} -U root -d postgres -c "CREATE DATABASE claude_memory OWNER claude_memory"
PGPASSWORD='${data.vault_kv_secret_v2.secrets.data["dbaas_root_password"]}' psql -h ${var.postgresql_host} -U root -d postgres -c "GRANT ALL PRIVILEGES ON DATABASE claude_memory TO claude_memory"
echo "Database init complete"
EOT
]
}
restart_policy = "Never"
}
}
backoff_limit = 3
}
wait_for_completion = true
timeouts {
create = "2m"
}
}
resource "kubernetes_deployment" "claude-memory" {
depends_on = [kubernetes_job.db_init]
metadata {
name = "claude-memory"
namespace = kubernetes_namespace.claude-memory.metadata[0].name
labels = {
app = "claude-memory"
tier = local.tiers.aux
}
annotations = {
"reloader.stakater.com/auto" = "true"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "claude-memory"
}
}
template {
metadata {
labels = {
app = "claude-memory"
}
annotations = {
"dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432"
}
}
spec {
affinity {
pod_anti_affinity {
required_during_scheduling_ignored_during_execution {
label_selector {
match_labels = {
app = "claude-memory"
}
}
topology_key = "kubernetes.io/hostname"
}
}
}
container {
name = "claude-memory"
# Phase 3 cutover 2026-05-07 moved off DockerHub to Forgejo as
# part of the registry consolidation. Old: viktorbarzin/claude-memory-mcp:17
image = "forgejo.viktorbarzin.me/viktor/claude-memory-mcp:17"
port {
container_port = 8000
}
env {
name = "DATABASE_URL"
value_from {
secret_key_ref {
name = "claude-memory-db-creds"
key = "DATABASE_URL"
}
}
}
env {
name = "API_KEYS"
value_from {
secret_key_ref {
name = "claude-memory-secrets"
key = "api_keys"
}
}
}
startup_probe {
http_get {
path = "/health"
port = 8000
}
failure_threshold = 30
period_seconds = 2
}
liveness_probe {
http_get {
path = "/health"
port = 8000
}
initial_delay_seconds = 5
period_seconds = 30
}
readiness_probe {
http_get {
path = "/health"
port = 8000
}
initial_delay_seconds = 3
period_seconds = 10
}
resources {
requests = {
memory = "128Mi"
cpu = "10m"
}
limits = {
memory = "128Mi"
}
}
}
}
}
}
lifecycle {
# DRIFT_WORKAROUND: CI pipeline owns image tag (kubectl set image from Woodpecker/GHA). Reviewed 2026-04-18.
ignore_changes = [
spec[0].template[0].spec[0].container[0].image,
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
# PDB removed single replica with minAvailable=1 blocks all node drains.
# claude-memory is non-critical and recovers quickly after rescheduling.
resource "kubernetes_service" "claude-memory" {
metadata {
name = "claude-memory"
namespace = kubernetes_namespace.claude-memory.metadata[0].name
labels = {
app = "claude-memory"
}
}
spec {
selector = {
app = "claude-memory"
}
port {
name = "http"
port = 80
target_port = 8000
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
# MCP server called by Claude Code (and other tools/agents) via app-layer
# bearer-token auth; forward-auth would break programmatic clients.
# auth = "none": MCP server called by Claude Code via bearer-token auth; forward-auth would break programmatic clients.
auth = "none"
dns_type = "proxied"
namespace = kubernetes_namespace.claude-memory.metadata[0].name
name = "claude-memory"
tls_secret_name = var.tls_secret_name
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Claude Memory"
"gethomepage.dev/description" = "Shared persistent memory for Claude sessions"
"gethomepage.dev/icon" = "claude-ai.png"
"gethomepage.dev/group" = "Core Platform"
"gethomepage.dev/pod-selector" = ""
}
}

View file

@ -0,0 +1,37 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
authentik = {
source = "goauthentik/authentik"
version = "~> 2024.10"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}

View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,13 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}
dependency "vault" {
config_path = "../vault"
skip_outputs = true
}

View file

@ -0,0 +1,44 @@
# =============================================================================
# Cloudflared Stack Cloudflare tunnel + DNS records
# =============================================================================
variable "tls_secret_name" { type = string }
variable "cloudflare_email" { type = string }
variable "cloudflare_account_id" { type = string }
variable "cloudflare_zone_id" { type = string }
variable "cloudflare_tunnel_id" { type = string }
variable "public_ip" { type = string }
variable "public_ipv6" { type = string }
variable "cloudflare_proxied_names" {}
variable "cloudflare_non_proxied_names" {}
data "vault_kv_secret_v2" "secrets" {
mount = "secret"
name = "platform"
}
locals {
k8s_users = jsondecode(data.vault_kv_secret_v2.secrets.data["k8s_users"])
user_domains = flatten([
for name, user in local.k8s_users : lookup(user, "domains", [])
if user.role == "namespace-owner"
])
}
module "cloudflared" {
source = "./modules/cloudflared"
tier = local.tiers.core
tls_secret_name = var.tls_secret_name
cloudflare_api_key = data.vault_kv_secret_v2.secrets.data["cloudflare_api_key"]
cloudflare_email = var.cloudflare_email
cloudflare_account_id = var.cloudflare_account_id
cloudflare_zone_id = var.cloudflare_zone_id
cloudflare_tunnel_id = var.cloudflare_tunnel_id
public_ip = var.public_ip
public_ipv6 = var.public_ipv6
cloudflare_proxied_names = concat(var.cloudflare_proxied_names, nonsensitive(local.user_domains))
cloudflare_non_proxied_names = var.cloudflare_non_proxied_names
cloudflare_tunnel_token = data.vault_kv_secret_v2.secrets.data["cloudflare_tunnel_token"]
}

View file

@ -0,0 +1,245 @@
# Contents for cloudflare account
variable "cloudflare_api_key" {}
variable "cloudflare_email" {}
variable "cloudflare_proxied_names" { type = list(string) }
variable "cloudflare_non_proxied_names" { type = list(string) }
variable "cloudflare_zone_id" {
description = "Zone ID for your domain"
type = string
}
variable "cloudflare_account_id" {
type = string
sensitive = true
}
variable "cloudflare_tunnel_id" {
type = string
sensitive = true
}
variable "public_ip" {
type = string
}
variable "public_ipv6" {
type = string
description = "Public IPv6 address for AAAA records (from HE tunnel broker)"
}
terraform {
required_providers {
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
}
}
provider "cloudflare" {
api_key = var.cloudflare_api_key # I gave up on getting the permissions on the token...
email = var.cloudflare_email
}
locals {
cloudflare_proxied_names_map = {
for h in var.cloudflare_proxied_names :
h => h
}
cloudflare_non_proxied_names_map = {
for h in var.cloudflare_non_proxied_names :
h => h
}
}
# Zone-level Bot Management. ai_bots_protection was "block" CF returned
# 403 to declared AI bot UAs at the edge, so the in-cluster x402 gateway
# never got a chance to issue HTTP 402 with a payment offer. Flipped to
# "disabled" so AI bots reach Traefik x402, which returns 402 with the
# wallet address. Generic Bot Fight Mode + crawler protection stay on.
# (import {} stanza for adoption lives in the root stack TF restriction.)
resource "cloudflare_bot_management" "zone" {
zone_id = var.cloudflare_zone_id
enable_js = true
fight_mode = true
ai_bots_protection = "disabled"
# crawler_protection / is_robots_txt_managed are settable only via newer
# provider versions; they retain whatever the API currently has
# (crawler_protection=enabled, is_robots_txt_managed=true).
}
resource "cloudflare_zero_trust_tunnel_cloudflared_config" "sof" {
account_id = var.cloudflare_account_id
tunnel_id = var.cloudflare_tunnel_id
config {
warp_routing {
enabled = true
}
# Wildcard rule routes all subdomains through the tunnel to Traefik,
# which handles host-based routing via K8s Ingress resources.
# Origin = in-cluster Traefik Service DNS (NOT a MetalLB LB IP) so the
# tunnel is decoupled from LB-IP changes. A raw IP here caused a full-site
# 502 on 2026-06-01 when Traefik moved 10.0.20.200 -> .203; see
# docs/post-mortems/2026-06-01-cloudflared-stale-traefik-origin.md.
ingress_rule {
hostname = "*.viktorbarzin.me"
service = "https://traefik.traefik.svc.cluster.local:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
hostname = "viktorbarzin.me"
service = "https://traefik.traefik.svc.cluster.local:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
service = "http_status:404"
}
}
}
resource "cloudflare_record" "dns_record" {
# count = length(var.cloudflare_proxied_names)
# name = var.cloudflare_proxied_names[count.index]
for_each = local.cloudflare_proxied_names_map
name = each.key
content = "${var.cloudflare_tunnel_id}.cfargotunnel.com"
proxied = true
ttl = 1
type = "CNAME"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "non_proxied_dns_record" {
# count = length(var.cloudflare_non_proxied_names)
# name = var.cloudflare_non_proxied_names[count.index]
for_each = local.cloudflare_non_proxied_names_map
name = each.key
# content = var.non_proxied_names[count.index].ip
content = var.public_ip
proxied = false
ttl = 1
type = "A"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "non_proxied_dns_record_ipv6" {
for_each = local.cloudflare_non_proxied_names_map
name = each.key
content = var.public_ipv6
proxied = false
ttl = 1
type = "AAAA"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail_mx" {
content = "mail.viktorbarzin.me"
name = "viktorbarzin.me"
proxied = false
ttl = 1
type = "MX"
priority = 1
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail_spf" {
# Brevo replaced Mailgun as the outbound relay on 2026-04-12 (see docs/architecture/mailserver.md).
# Soft-fail (~all) is intentional during cutover revisit once relay delivery is stable.
content = "\"v=spf1 include:spf.brevo.com ~all\""
name = "viktorbarzin.me"
proxied = false
ttl = 1
type = "TXT"
priority = 1
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail_domainkey_rspamd" {
content = "\"v=DKIM1; h=sha256; k=rsa; p=MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAs9XHeFBKhUAEJSikXx+P49Q3nEBbnaSpn6h/9TqIhKaZWSVa2uGUGYQieNdon7DEJZ0VFo0Tvm3/UFsy2qF7ZmF+E/+N8EmkcPrMlxgJT281dpk5DxrZ+kbzw/DosfHH71K6vCLB4rSexzxJHaAx0AUddI3bFUJGjMgCXXCMZF+p8YCx+DDGPIXz2FOTtlJlR7aeZ2xXavwE/lBfI3MLnsq7X+GhPjQEax070nndOdZI0S8HpZkVxdGWl1N2Ec6LukYm2RiUkEMMQHSYX7WF3JBc+CGqUyd706Iy/5oeC3UGwZSM2uLkrp8YBjmw/h1rAeyv/ITt6ZXraP/cIMRiVQIDAQAB\""
name = "mail._domainkey.viktorbarzin.me"
proxied = false
ttl = 1
type = "TXT"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "brevo_domainkey1" {
content = "b1.viktorbarzin-me.dkim.brevo.com."
name = "brevo1._domainkey.viktorbarzin.me"
proxied = false
ttl = 1
type = "CNAME"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "brevo_domainkey2" {
content = "b2.viktorbarzin-me.dkim.brevo.com."
name = "brevo2._domainkey.viktorbarzin.me"
proxied = false
ttl = 1
type = "CNAME"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "brevo_code" {
content = "\"brevo-code:a6ef1dd91b248559900246eb4e7ceebd\""
name = "viktorbarzin.me"
proxied = false
ttl = 1
type = "TXT"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail_mta_sts" {
content = "\"v=STSv1; id=20260412\""
name = "_mta-sts.viktorbarzin.me"
proxied = false
ttl = 1
type = "TXT"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail_tlsrpt" {
content = "\"v=TLSRPTv1; rua=mailto:postmaster@viktorbarzin.me\""
name = "_smtp._tls.viktorbarzin.me"
proxied = false
ttl = 1
type = "TXT"
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "mail_dmarc" {
content = "\"v=DMARC1; p=quarantine; pct=100; fo=1; ri=3600; sp=quarantine; adkim=r; aspf=r; rua=mailto:dmarc@viktorbarzin.me,mailto:adb84997@inbox.ondmarc.com; ruf=mailto:dmarc@viktorbarzin.me,mailto:adb84997@inbox.ondmarc.com,mailto:postmaster@viktorbarzin.me;\""
name = "_dmarc.viktorbarzin.me"
proxied = false
ttl = 1
type = "TXT"
priority = 1
zone_id = var.cloudflare_zone_id
}
resource "cloudflare_record" "keyserver" {
content = "130.162.165.220" # Oracle VPS
name = "keyserver.viktorbarzin.me"
proxied = false
ttl = 3600
type = "A"
priority = 1
zone_id = var.cloudflare_zone_id
}
# Enable HTTP/3 (QUIC) for Cloudflare-proxied domains
resource "cloudflare_zone_settings_override" "http3" {
zone_id = var.cloudflare_zone_id
settings {
http3 = "on"
}
}

View file

@ -0,0 +1,139 @@
# Contents for cloudflare tunnel
variable "tls_secret_name" {}
variable "cloudflare_tunnel_token" {}
resource "kubernetes_namespace" "cloudflared" {
metadata {
name = "cloudflared"
labels = {
tier = var.tier
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
variable "tier" { type = string }
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.cloudflared.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "cloudflared" {
metadata {
name = "cloudflared"
namespace = kubernetes_namespace.cloudflared.metadata[0].name
labels = {
app = "cloudflared"
tier = var.tier
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 3
strategy {
type = "RollingUpdate"
}
selector {
match_labels = {
app = "cloudflared"
}
}
template {
metadata {
labels = {
app = "cloudflared"
}
}
spec {
topology_spread_constraint {
max_skew = 1
topology_key = "kubernetes.io/hostname"
when_unsatisfiable = "ScheduleAnyway"
label_selector {
match_labels = {
app = "cloudflared"
}
}
}
container {
# image = "wisdomsky/cloudflared-web:latest"
image = "cloudflare/cloudflared"
name = "cloudflared"
command = ["cloudflared", "tunnel", "run"]
env {
name = "TUNNEL_TOKEN"
value = var.cloudflare_tunnel_token
}
port {
container_port = 14333
}
resources {
requests = {
cpu = "15m"
memory = "128Mi"
}
limits = {
memory = "128Mi"
}
}
}
dns_config {
option {
name = "ndots"
value = "2"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].template[0].spec[0].dns_config]
}
}
resource "kubernetes_pod_disruption_budget_v1" "cloudflared" {
metadata {
name = "cloudflared"
namespace = kubernetes_namespace.cloudflared.metadata[0].name
}
spec {
max_unavailable = "1"
selector {
match_labels = {
app = "cloudflared"
}
}
}
}
resource "kubernetes_service" "cloudflared" {
metadata {
name = "cloudflared"
namespace = kubernetes_namespace.cloudflared.metadata[0].name
labels = {
"app" = "cloudflared"
}
}
spec {
selector = {
app = "cloudflared"
}
port {
name = "http"
target_port = 14333
port = 80
protocol = "TCP"
}
}
}

1
stacks/cloudflared/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "infra" {
config_path = "../infra"
skip_outputs = true
}

4
stacks/cnpg/main.tf Normal file
View file

@ -0,0 +1,4 @@
module "cnpg" {
source = "./modules/cnpg"
tier = local.tiers.cluster
}

View file

@ -0,0 +1,67 @@
variable "tier" { type = string }
# -----------------------------------------------------------------------------
# Namespace
# -----------------------------------------------------------------------------
resource "kubernetes_namespace" "cnpg_system" {
metadata {
name = "cnpg-system"
labels = {
tier = var.tier
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
# -----------------------------------------------------------------------------
# CloudNativePG Operator manages PostgreSQL clusters via CRDs
# https://cloudnative-pg.io/
# -----------------------------------------------------------------------------
resource "helm_release" "cnpg" {
namespace = kubernetes_namespace.cnpg_system.metadata[0].name
create_namespace = false
name = "cnpg"
atomic = true
timeout = 300
repository = "https://cloudnative-pg.github.io/charts"
chart = "cloudnative-pg"
version = "0.27.1"
values = [yamlencode({
crds = {
create = true
}
replicaCount = 1
resources = {
requests = {
cpu = "100m"
memory = "256Mi"
}
limits = {
memory = "256Mi"
}
}
# Tune webhook-cert renewal threshold. CNPG default is 7 days remaining,
# which leaves no buffer when the cluster-health check (#22) flags
# certs at <30d. Bump to 30 days so the operator rotates well before
# external monitoring notices. Cert lifetime stays at chart default 90d.
config = {
data = {
EXPIRING_CHECK_THRESHOLD = "30"
}
}
})]
}
# NOTE: local-path-provisioner is already installed in the cluster
# (via cloud-init template) with StorageClass "local-path" (default).
# ReclaimPolicy is "Delete" for CNPG clusters, set
# .spec.storage.pvcTemplate.storageClassName = "local-path" in the
# Cluster CR. CNPG handles PVC lifecycle independently.

1
stacks/cnpg/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "infra" {
config_path = "../infra"
skip_outputs = true
}

86
stacks/coturn/.terraform.lock.hcl generated Normal file
View file

@ -0,0 +1,86 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.7"
constraints = "~> 4.0"
hashes = [
"h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
"zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
"zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
"zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
"zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
"zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
"zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
"zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
"zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
"zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
"zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
"zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
"zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
"zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.0.1"
hashes = [
"h1:P0c8knzZnouTNFIRij8IS7+pqd0OKaFDYX0j4GRsiqo=",
"h1:vyHdH0p6bf9xp1NPePObAJkXTJb/I09FQQmmevTzZe0=",
"zh:02d55b0b2238fd17ffa12d5464593864e80f402b90b31f6e1bd02249b9727281",
"zh:20b93a51bfeed82682b3c12f09bac3031f5bdb4977c47c97a042e4df4fb2f9ba",
"zh:6e14486ecfaee38c09ccf33d4fdaf791409f90795c1b66e026c226fad8bc03c7",
"zh:8d0656ff422df94575668e32c310980193fccb1c28117e5c78dd2d4050a760a6",
"zh:9795119b30ec0c1baa99a79abace56ac850b6e6fbce60e7f6067792f6eb4b5f4",
"zh:b388c87acc40f6bd9620f4e23f01f3c7b41d9b88a68d5255dec0a72f0bdec249",
"zh:b59abd0a980649c2f97f172392f080eaeb18e486b603f83bf95f5d93aeccc090",
"zh:ba6e3060fddf4a022087d8f09e38aa0001c705f21170c2ded3d1c26c12f70d97",
"zh:c12626d044b1d5501cf95ca78cbe507c13ad1dd9f12d4736df66eb8e5f336eb8",
"zh:c55203240d50f4cdeb3df1e1760630d677679f5b1a6ffd9eba23662a4ad05119",
"zh:ea206a5a32d6e0d6e32f1849ad703da9a28355d9c516282a8458b5cf1502b2a1",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/vault" {
version = "4.8.0"
constraints = "~> 4.0"
hashes = [
"h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
"h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
"zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
"zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
"zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
"zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
"zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
"zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
"zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
"zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
"zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
"zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
]
}

254
stacks/coturn/main.tf Normal file
View file

@ -0,0 +1,254 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
variable "public_ip" { type = string }
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "coturn-secrets"
namespace = "coturn"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "coturn-secrets"
}
dataFrom = [{
extract = {
key = "coturn"
}
}]
}
}
depends_on = [kubernetes_namespace.coturn]
}
data "kubernetes_secret" "eso_secrets" {
metadata {
name = "coturn-secrets"
namespace = kubernetes_namespace.coturn.metadata[0].name
}
depends_on = [kubernetes_manifest.external_secret]
}
locals {
turn_realm = "viktorbarzin.me"
turn_port = 3478
# Small relay range 100 ports is plenty for a home lab (~50 concurrent streams)
min_port = 49152
max_port = 49252
}
resource "kubernetes_namespace" "coturn" {
metadata {
name = "coturn"
labels = {
tier = local.tiers.edge
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.coturn.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_config_map" "coturn_config" {
metadata {
name = "coturn-config"
namespace = kubernetes_namespace.coturn.metadata[0].name
}
data = {
"turnserver.conf" = <<-EOF
# TURN server configuration
listening-port=${local.turn_port}
fingerprint
lt-cred-mech
use-auth-secret
static-auth-secret=${data.kubernetes_secret.eso_secrets.data["turn_secret"]}
realm=${local.turn_realm}
server-name=turn.${local.turn_realm}
# Network use 0.0.0.0, coturn auto-detects pod IP
listening-ip=0.0.0.0
external-ip=${var.public_ip}
# Media relay port range (narrow 100 ports)
min-port=${local.min_port}
max-port=${local.max_port}
# Logging
verbose
no-stdout-log
syslog
# Security
no-multicast-peers
no-cli
no-tlsv1
no-tlsv1_1
# Performance
total-quota=100
stale-nonce=600
max-bps=0
EOF
}
}
resource "kubernetes_deployment" "coturn" {
metadata {
name = "coturn"
namespace = kubernetes_namespace.coturn.metadata[0].name
labels = {
app = "coturn"
tier = local.tiers.edge
}
annotations = {
"reloader.stakater.com/auto" = "true"
}
}
spec {
replicas = 1
strategy {
type = "RollingUpdate"
rolling_update {
max_unavailable = 0
max_surge = 1
}
}
selector {
match_labels = {
app = "coturn"
}
}
template {
metadata {
labels = {
app = "coturn"
}
annotations = {
"diun.enable" = "true"
"diun.include_tags" = "^\\d+\\.\\d+\\.\\d+-r\\d+$"
}
}
spec {
container {
name = "coturn"
image = "coturn/coturn:4.10.0-r1"
args = ["-c", "/etc/turnserver/turnserver.conf"]
# STUN/TURN signaling port
port {
name = "turn-udp"
container_port = local.turn_port
protocol = "UDP"
}
port {
name = "turn-tcp"
container_port = local.turn_port
protocol = "TCP"
}
volume_mount {
name = "config"
mount_path = "/etc/turnserver"
read_only = true
}
resources {
requests = {
cpu = "10m"
memory = "64Mi"
}
limits = {
memory = "64Mi"
}
}
}
volume {
name = "config"
config_map {
name = kubernetes_config_map.coturn_config.metadata[0].name
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
# LoadBalancer service with MetalLB exposes STUN/TURN signaling + relay ports
resource "kubernetes_service" "coturn" {
metadata {
name = "coturn"
namespace = kubernetes_namespace.coturn.metadata[0].name
annotations = {
"metallb.io/loadBalancerIPs" = "10.0.20.200"
"metallb.io/allow-shared-ip" = "shared"
}
}
spec {
type = "LoadBalancer"
selector = {
app = "coturn"
}
# STUN/TURN signaling
port {
name = "turn-udp"
port = local.turn_port
target_port = local.turn_port
protocol = "UDP"
}
port {
name = "turn-tcp"
port = local.turn_port
target_port = local.turn_port
protocol = "TCP"
}
# Relay port range (49152-49252)
dynamic "port" {
for_each = range(local.min_port, local.max_port + 1)
content {
name = "relay-${port.value}"
port = port.value
target_port = port.value
protocol = "UDP"
}
}
}
}

View file

@ -0,0 +1,33 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}

1
stacks/coturn/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,13 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}
dependency "vault" {
config_path = "../vault"
skip_outputs = true
}

32
stacks/crowdsec/main.tf Normal file
View file

@ -0,0 +1,32 @@
# =============================================================================
# CrowdSec Stack Security/WAF
# =============================================================================
variable "tls_secret_name" { type = string }
variable "mysql_host" { type = string }
variable "postgresql_host" { type = string }
data "vault_kv_secret_v2" "secrets" {
mount = "secret"
name = "platform"
}
locals {
homepage_credentials = jsondecode(data.vault_kv_secret_v2.secrets.data["homepage_credentials"])
}
module "crowdsec" {
source = "./modules/crowdsec"
tier = local.tiers.cluster
tls_secret_name = var.tls_secret_name
mysql_host = var.mysql_host
postgresql_host = var.postgresql_host
homepage_username = local.homepage_credentials["crowdsec"]["username"]
homepage_password = local.homepage_credentials["crowdsec"]["password"]
enroll_key = data.vault_kv_secret_v2.secrets.data["crowdsec_enroll_key"]
db_password = data.vault_kv_secret_v2.secrets.data["crowdsec_db_password"]
crowdsec_dash_api_key = data.vault_kv_secret_v2.secrets.data["crowdsec_dash_api_key"]
crowdsec_dash_machine_id = data.vault_kv_secret_v2.secrets.data["crowdsec_dash_machine_id"]
crowdsec_dash_machine_password = data.vault_kv_secret_v2.secrets.data["crowdsec_dash_machine_password"]
slack_webhook_url = data.vault_kv_secret_v2.secrets.data["alertmanager_slack_api_url"]
}

View file

@ -0,0 +1,44 @@
controller:
extraVolumes:
- name: crowdsec-bouncer-plugin
emptyDir: {}
extraInitContainers:
- name: init-clone-crowdsec-bouncer
image: crowdsecurity/lua-bouncer-plugin
imagePullPolicy: IfNotPresent
env:
- name: API_URL
value: "http://crowdsec-service.crowdsec.svc.cluster.local:8080" # crowdsec lapi service-name
- name: API_KEY
value: "<API KEY>" # generated with `cscli bouncers add -n <bouncer_name>
- name: BOUNCER_CONFIG
value: "/crowdsec/crowdsec-bouncer.conf"
- name: CAPTCHA_PROVIDER
value: "recaptcha" # valid providers are recaptcha, hcaptcha, turnstile
- name: SECRET_KEY
value: "<your-captcha-secret-key>" # If you want captcha support otherwise remove this ENV VAR
- name: SITE_KEY
value: "<your-captcha-site-key>" # If you want captcha support otherwise remove this ENV VAR
- name: BAN_TEMPLATE_PATH
value: /etc/nginx/lua/plugins/crowdsec/templates/ban.html
- name: CAPTCHA_TEMPLATE_PATH
value: /etc/nginx/lua/plugins/crowdsec/templates/captcha.html
command:
[
"sh",
"-c",
"sh /docker_start.sh; mkdir -p /lua_plugins/crowdsec/; cp -R /crowdsec/* /lua_plugins/crowdsec/",
]
volumeMounts:
- name: crowdsec-bouncer-plugin
mountPath: /lua_plugins
extraVolumeMounts:
- name: crowdsec-bouncer-plugin
mountPath: /etc/nginx/lua/plugins/crowdsec
subPath: crowdsec
config:
plugins: "crowdsec"
lua-shared-dicts: "crowdsec_cache: 50m"
server-snippet: |
lua_ssl_trusted_certificate "/etc/ssl/certs/ca-certificates.crt"; # If you want captcha support otherwise remove this line
resolver local=on ipv6=off;

View file

@ -0,0 +1,451 @@
variable "tls_secret_name" {}
variable "homepage_username" {}
variable "homepage_password" {}
variable "db_password" {}
variable "enroll_key" {}
variable "crowdsec_dash_api_key" {
type = string
sensitive = true
}
variable "crowdsec_dash_machine_id" { type = string } # used for web dash
variable "crowdsec_dash_machine_password" {
type = string
sensitive = true
}
variable "tier" { type = string }
variable "slack_webhook_url" { type = string }
variable "mysql_host" { type = string }
variable "postgresql_host" { type = string }
module "tls_secret" {
source = "../../../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_namespace" "crowdsec" {
metadata {
name = "crowdsec"
labels = {
tier = var.tier
"resource-governance/custom-quota" = "true"
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
resource "kubernetes_config_map" "crowdsec_custom_scenarios" {
metadata {
name = "crowdsec-custom-scenarios"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
"app.kubernetes.io/name" = "crowdsec"
}
}
data = {
"http-403-abuse.yaml" = <<-YAML
type: leaky
name: crowdsecurity/http-403-abuse
description: "Detect IPs triggering too many HTTP 403s in NGINX ingress logs"
filter: "evt.Meta.log_type == 'http_access-log' && evt.Parsed.status == '403'"
groupby: "evt.Meta.source_ip"
leakspeed: "2s"
capacity: 10
blackhole: 5m
labels:
service: http
behavior: abusive_403
remediation: true
YAML
"http-429-abuse.yaml" : <<-YAML
type: leaky
name: crowdsecurity/http-429-abuse
description: "Detect IPs repeatedly triggering rate-limit (HTTP 429)"
filter: "evt.Meta.log_type == 'http_access-log' && evt.Parsed.status == '429'"
groupby: "evt.Meta.source_ip"
leakspeed: "10s"
capacity: 5
blackhole: 1m
labels:
service: http
behavior: rate_limit_abuse
remediation: true
YAML
}
}
# Whitelist for trusted IPs that should never be blocked
resource "kubernetes_config_map" "crowdsec_whitelist" {
metadata {
name = "crowdsec-whitelist"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
"app.kubernetes.io/name" = "crowdsec"
}
}
data = {
"whitelist.yaml" = <<-YAML
name: crowdsecurity/whitelist-trusted-ips
description: "Whitelist for trusted IPs that should never be blocked"
whitelist:
reason: "Trusted IP - never block"
ip:
- "176.12.22.76"
---
name: viktor/immich-asset-paths-whitelist
description: "Don't penalise legit Immich timeline bursts (mobile scrub, web grid)"
whitelist:
reason: "Immich asset endpoints are auth-gated; mobile scrub legitimately bursts"
expression:
- >
evt.Parsed.target_fqdn == "immich.viktorbarzin.me" &&
(evt.Parsed.request startsWith "/api/assets/" ||
evt.Parsed.request startsWith "/api/timeline/" ||
evt.Parsed.request startsWith "/api/asset/" ||
evt.Parsed.request startsWith "/api/search/" ||
evt.Parsed.request startsWith "/api/memories" ||
evt.Parsed.request startsWith "/api/albums" ||
evt.Parsed.request startsWith "/api/activities")
YAML
}
}
# Syslog acquisition config for pfSense firewall log ingestion
resource "kubernetes_config_map" "crowdsec_syslog_acquisition" {
metadata {
name = "crowdsec-syslog-acquisition"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
"app.kubernetes.io/name" = "crowdsec"
}
}
data = {
"syslog.yaml" = <<-YAML
source: syslog
listen_addr: "0.0.0.0"
listen_port: 514
labels:
type: pf
YAML
}
}
resource "helm_release" "crowdsec" {
namespace = kubernetes_namespace.crowdsec.metadata[0].name
create_namespace = true
name = "crowdsec"
atomic = true
version = "0.21.0"
repository = "https://crowdsecurity.github.io/helm-charts"
chart = "crowdsec"
values = [templatefile("${path.module}/values.yaml", { homepage_username = var.homepage_username, homepage_password = var.homepage_password, DB_PASSWORD = var.db_password, ENROLL_KEY = var.enroll_key, SLACK_WEBHOOK_URL = var.slack_webhook_url, mysql_host = var.mysql_host, postgresql_host = var.postgresql_host })]
timeout = 1200
wait = true
wait_for_jobs = true
}
# NodePort service for pfSense syslog CrowdSec agent
# pfSense sends firewall logs to 10.0.20.202:30514 (any k8s node IP works)
resource "kubernetes_service" "crowdsec_syslog" {
metadata {
name = "crowdsec-syslog"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
app = "crowdsec-syslog"
}
}
spec {
type = "NodePort"
selector = {
"k8s-app" = "crowdsec"
type = "agent"
}
port {
name = "syslog-udp"
port = 514
target_port = 514
node_port = 30514
protocol = "UDP"
}
}
}
# Deployment for my custom dashboard that helps me unblock myself when I blocklist myself
resource "kubernetes_deployment" "crowdsec-web" {
metadata {
name = "crowdsec-web"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
app = "crowdsec_web"
"kubernetes.io/cluster-service" = "true"
tier = var.tier
}
}
spec {
replicas = 1
strategy {
type = "RollingUpdate"
}
selector {
match_labels = {
app = "crowdsec_web"
}
}
template {
metadata {
labels = {
app = "crowdsec_web"
"kubernetes.io/cluster-service" = "true"
}
}
spec {
priority_class_name = "tier-1-cluster"
container {
name = "crowdsec-web"
image = "viktorbarzin/crowdsec_web"
env {
name = "CS_API_URL"
value = "http://crowdsec-service.crowdsec.svc.cluster.local:8080/v1"
}
env {
name = "CS_API_KEY"
value = var.crowdsec_dash_api_key
}
env {
name = "CS_MACHINE_ID"
value = var.crowdsec_dash_machine_id
}
env {
name = "CS_MACHINE_PASSWORD"
value = var.crowdsec_dash_machine_password
}
port {
name = "http"
container_port = 8000
protocol = "TCP"
}
resources {
requests = {
cpu = "15m"
memory = "128Mi"
}
limits = {
memory = "128Mi"
}
}
}
dns_config {
option {
name = "ndots"
value = "2"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].template[0].spec[0].dns_config]
}
}
resource "kubernetes_service" "crowdsec-web" {
metadata {
name = "crowdsec-web"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
"app" = "crowdsec_web"
}
}
spec {
selector = {
app = "crowdsec_web"
}
port {
port = "80"
target_port = "8000"
}
}
}
module "ingress" {
source = "../../../../modules/kubernetes/ingress_factory"
dns_type = "proxied"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
name = "crowdsec-web"
auth = "required"
tls_secret_name = var.tls_secret_name
exclude_crowdsec = true
}
# CronJob to import public blocklists into CrowdSec
# https://github.com/wolffcatskyy/crowdsec-blocklist-import
# Uses kubectl exec to run in an existing CrowdSec agent pod that's already registered
resource "kubernetes_cron_job_v1" "crowdsec_blocklist_import" {
metadata {
name = "crowdsec-blocklist-import"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
labels = {
app = "crowdsec-blocklist-import"
tier = var.tier
}
}
spec {
# Run daily at 4 AM
schedule = "0 4 * * *"
timezone = "Europe/London"
concurrency_policy = "Forbid"
successful_jobs_history_limit = 3
failed_jobs_history_limit = 3
job_template {
metadata {
labels = {
app = "crowdsec-blocklist-import"
}
}
spec {
backoff_limit = 3
template {
metadata {
labels = {
app = "crowdsec-blocklist-import"
}
}
spec {
service_account_name = kubernetes_service_account.blocklist_import.metadata[0].name
restart_policy = "OnFailure"
container {
name = "blocklist-import"
image = "bitnami/kubectl:latest"
command = ["/bin/bash", "-c"]
args = [
<<-EOF
set -e
echo "Finding CrowdSec agent pod..."
AGENT_POD=$(kubectl get pods -n crowdsec -l k8s-app=crowdsec,type=agent -o jsonpath='{.items[0].metadata.name}')
if [ -z "$AGENT_POD" ]; then
echo "ERROR: Could not find CrowdSec agent pod"
exit 1
fi
echo "Using agent pod: $AGENT_POD"
# Download the import script
echo "Downloading blocklist import script..."
curl -fsSL -o /tmp/import.sh \
https://raw.githubusercontent.com/wolffcatskyy/crowdsec-blocklist-import/main/import.sh
chmod +x /tmp/import.sh
# Copy script to agent pod and execute
echo "Copying script to agent pod and executing..."
kubectl cp /tmp/import.sh crowdsec/$AGENT_POD:/tmp/import.sh
kubectl exec -n crowdsec "$AGENT_POD" -- /bin/bash -c '
set -e
# Run with native mode since we are inside the CrowdSec container
export MODE=native
export DECISION_DURATION=168h
export FETCH_TIMEOUT=60
export LOG_LEVEL=INFO
/tmp/import.sh
# Cleanup
rm -f /tmp/import.sh
'
echo "Blocklist import completed successfully!"
EOF
]
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# Service account for the blocklist import job (needs kubectl exec permissions)
resource "kubernetes_service_account" "blocklist_import" {
metadata {
name = "crowdsec-blocklist-import"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
}
resource "kubernetes_role" "blocklist_import" {
metadata {
name = "crowdsec-blocklist-import"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
rule {
api_groups = [""]
resources = ["pods"]
verbs = ["get", "list"]
}
rule {
api_groups = [""]
resources = ["pods/exec"]
verbs = ["create"]
}
}
resource "kubernetes_role_binding" "blocklist_import" {
metadata {
name = "crowdsec-blocklist-import"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.blocklist_import.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.blocklist_import.metadata[0].name
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
}
# Custom ResourceQuota for CrowdSec needs more than default 1-cluster quota
# because it runs DaemonSet agents (1 per worker node) + 3 LAPI replicas + web UI
resource "kubernetes_resource_quota" "crowdsec" {
metadata {
name = "crowdsec-quota"
namespace = kubernetes_namespace.crowdsec.metadata[0].name
}
spec {
hard = {
"requests.cpu" = "4"
"requests.memory" = "8Gi"
"limits.memory" = "16Gi"
pods = "30"
}
}
}

View file

@ -0,0 +1,264 @@
# values from - https://github.com/crowdsecurity/helm-charts/blob/main/charts/crowdsec/values.yaml
container_runtime: containerd
# Pin the image tag to v1.7.8 — the chart 0.21.0 defaults appVersion to
# v1.7.3 but Keel had auto-bumped the running pods to v1.7.8 on 2026-05-16
# and they ran fine with CAPI working for ~8 days. The 2026-05-24 TF apply
# re-rendered the deployment from chart defaults (v1.7.3) and CAPI auth
# started returning 403 on every fresh replica. Pinning here makes the
# image survive future TF applies independently of the chart's appVersion.
image:
tag: "v1.7.8"
agent:
resources:
requests:
cpu: 25m
memory: 128Mi
limits:
memory: 512Mi
priorityClassName: "tier-1-cluster"
# To specify each pod you want to process it logs (pods present in the node)
acquisition:
# The namespace where the pod is located
- namespace: traefik
# The pod name
podName: traefik-*
# as in crowdsec configuration, we need to specify the program name so the parser will match and parse logs
program: traefik
# Mailserver logs for SMTP/IMAP brute-force detection
- namespace: mailserver
podName: mailserver-*
program: postfix
- namespace: mailserver
podName: mailserver-*
program: dovecot
# Those are ENV variables
env:
# As it's a test, we don't want to share signals with CrowdSec so disable the Online API.
# - name: DISABLE_ONLINE_API
# value: "true"
# As we are running Traefik, we want to install the Traefik collection
- name: COLLECTIONS
value: "crowdsecurity/traefik crowdsecurity/base-http-scenarios crowdsecurity/http-cve crowdsecurity/pfsense firewallservices/pf crowdsecurity/postfix crowdsecurity/dovecot crowdsecurity/sshd"
- name: SCENARIOS
value: ""
# value: "crowdsecurity/http-crawl-aggressive"
# Mount custom scenarios into /etc/crowdsec/scenarios
extraVolumeMounts:
- name: custom-scenarios
mountPath: /etc/crowdsec/scenarios/http-403-abuse.yaml
subPath: "http-403-abuse.yaml"
readonly: true
- name: custom-scenarios
mountPath: /etc/crowdsec/scenarios/http-429-abuse.yaml
subPath: "http-429-abuse.yaml"
readonly: true
- name: whitelist
mountPath: /etc/crowdsec/parsers/s02-enrich/whitelist.yaml
subPath: "whitelist.yaml"
readonly: true
- name: syslog-acquisition
mountPath: /etc/crowdsec/acquis.d/syslog.yaml
subPath: "syslog.yaml"
readonly: true
extraVolumes:
- name: custom-scenarios
configMap:
name: crowdsec-custom-scenarios
- name: whitelist
configMap:
name: crowdsec-whitelist
- name: syslog-acquisition
configMap:
name: crowdsec-syslog-acquisition
lapi:
resources:
requests:
cpu: 25m
memory: 128Mi
limits:
memory: 1Gi
startupProbe:
httpGet:
path: /health
port: 8080
failureThreshold: 30
periodSeconds: 10
priorityClassName: "tier-1-cluster"
replicas: 3
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app.kubernetes.io/name: crowdsec
type: lapi
pdb:
enabled: true
maxUnavailable: 1
extraSecrets:
dbPassword: "${DB_PASSWORD}"
storeCAPICredentialsInSecret: true
persistentVolume:
config:
enabled: false
data:
enabled: false
env:
# CAPI re-enabled 2026-05-24 with image.tag pin to v1.7.8 (see top of
# file). The crashloop earlier today was triggered when TF apply
# reverted the running image v1.7.8 → v1.7.3 (chart 0.21.0 default
# appVersion); v1.7.3 has a CAPI watcher-auth bug against the
# current api.crowdsec.net behaviour. v1.7.8 ran cleanly for 8 days
# (May 16 → May 24) before the revert and authenticates fine.
#
# ENROLL_KEY env intentionally not set — the existing key
# `cmey5e636…` was already consumed (single-shot per replica) and
# subsequent replicas hit 403 Forbidden on `cscli console enroll`.
# CAPI works WITHOUT console enroll — they're separate flows. To
# restore console reporting at app.crowdsec.net: generate a fresh
# enroll key there (Settings → Engines → Enroll), put it in
# var.enroll_key, restore the ENROLL_KEY/INSTANCE_NAME/TAGS env block.
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: crowdsec-lapi-secrets
key: dbPassword
dashboard:
enabled: true
env:
- name: MB_DB_TYPE
value: "postgres"
- name: MB_DB_DBNAME
value: crowdsec_metabase
- name: MB_DB_USER
value: "crowdsec"
- name: MB_DB_PASS
value: "${DB_PASSWORD}"
- name: MB_DB_HOST
value: "${postgresql_host}"
- name: MB_DB_PORT
value: "5432"
- name: MB_EMAIL_SMTP_USERNAME
value: "info@viktorbarzin.me"
- name: MB_EMAIL_FROM_ADDRESS
value: "info@viktorbarzin.me"
- name: MB_EMAIL_SMTP_HOST
value: "mailserver.mailserver.svc.cluster.local"
- name: MB_EMAIL_SMTP_PASSWORD
value: "" # Ignore for now as it's unclear what notifications we can get
- name: MB_EMAIL_SMTP_PORT
value: "587"
- name: MB_EMAIL_SMTP_SECURITY
value: "starttls"
ingress:
enabled: true
annotations:
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
#nginx.ingress.kubernetes.io/auth-url: "https://oauth2.viktorbarzin.me/oauth2/auth"
nginx.ingress.kubernetes.io/auth-url: "http://ak-outpost-authentik-embedded-outpost.authentik.svc.cluster.local:9000/outpost.goauthentik.io/auth/nginx"
# nginx.ingress.kubernetes.io/auth-signin: "https://oauth2.viktorbarzin.me/oauth2/start?rd=/redirect/$http_host$escaped_request_uri"
nginx.ingress.kubernetes.io/auth-signin: "https://authentik.viktorbarzin.me/outpost.goauthentik.io/start?rd=$scheme%3A%2F%2F$host$escaped_request_uri"
nginx.ingress.kubernetes.io/auth-response-headers: "Set-Cookie,X-authentik-username,X-authentik-groups,X-authentik-email,X-authentik-name,X-authentik-uid"
nginx.ingress.kubernetes.io/auth-snippet: "proxy_set_header X-Forwarded-Host $http_host;"
gethomepage.dev/enabled: "true"
gethomepage.dev/description: "Web Application Firewall"
gethomepage.dev/icon: "crowdsec.png"
gethomepage.dev/name: "CrowdSec"
gethomepage.dev/group: "Identity & Security"
gethomepage.dev/widget.type: "crowdsec"
gethomepage.dev/widget.url: "http://crowdsec-service.crowdsec.svc.cluster.local:8080"
gethomepage.dev/widget.username: "${homepage_username}"
gethomepage.dev/widget.password: "${homepage_password}"
gethomepage.dev/pod-selector: ""
ingressClassName: "nginx"
host: "crowdsec.viktorbarzin.me"
tls:
- hosts:
- crowdsec.viktorbarzin.me
secretName: "tls-secret"
metrics:
enabled: true
strategy:
type: RollingUpdate
config:
# Custom profiles: captcha for rate limiting, ban for attacks
profiles.yaml: |
# Captcha for rate limiting and 403 abuse (user can unblock themselves)
name: captcha_remediation
filters:
- Alert.Remediation == true && Alert.GetScope() == "Ip" && Alert.GetScenario() in ["crowdsecurity/http-429-abuse", "crowdsecurity/http-403-abuse", "crowdsecurity/http-crawl-non_statics", "crowdsecurity/http-sensitive-files"]
decisions:
- type: captcha
duration: 4h
notifications:
- slack_alerts
on_success: break
---
# Default: Ban for serious attacks (CVE exploits, scanners, brute force)
name: default_ip_remediation
filters:
- Alert.Remediation == true && Alert.GetScope() == "Ip"
decisions:
- type: ban
duration: 4h
notifications:
- slack_alerts
on_success: break
---
name: default_range_remediation
filters:
- Alert.Remediation == true && Alert.GetScope() == "Range"
decisions:
- type: ban
duration: 4h
notifications:
- slack_alerts
on_success: break
config.yaml.local: |
db_config:
type: postgres
user: crowdsec
password: ${DB_PASSWORD}
db_name: crowdsec
host: ${postgresql_host}
port: 5432
flush:
max_items: 10000
max_age: "7d"
bouncers_autodelete:
api_key: "30d"
agents_autodelete:
login_password: "30d"
decision_bulk_size: 2000
api:
server:
auto_registration: # Activate if not using TLS for authentication
enabled: true
token: "$${REGISTRATION_TOKEN}" # /!\ do not change
allowed_ranges: # /!\ adapt to the pod IP ranges used by your cluster
- "127.0.0.1/32"
- "192.168.0.0/16"
- "10.0.0.0/8"
- "172.16.0.0/12"
notifications:
slack.yaml: |
type: slack
name: slack_alerts
log_level: info
format: |
:rotating_light: *CrowdSec Alert*
{{range .}}
*Scenario:* {{.Alert.Scenario}}
*Source IP:* {{.Alert.Source.IP}} ({{.Alert.Source.Cn}})
*Decisions:*
{{range .Alert.Decisions}} - {{.Type}} for {{.Duration}} (scope: {{.Scope}}, value: {{.Value}})
{{end}}
{{end}}
webhook: ${SLACK_WEBHOOK_URL}

1
stacks/crowdsec/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "infra" {
config_path = "../infra"
skip_outputs = true
}

81
stacks/cyberchef/.terraform.lock.hcl generated Normal file
View file

@ -0,0 +1,81 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.7"
constraints = "~> 4.0"
hashes = [
"h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
"zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
"zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
"zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
"zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
"zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
"zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
"zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
"zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
"zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
"zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
"zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
"zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
"zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
]
}
provider "registry.terraform.io/goauthentik/authentik" {
version = "2024.12.1"
constraints = "~> 2024.10"
hashes = [
"h1:roBMd+gi+TGgikH/bMzEI8JfvJiMAQWt+8FmokCrQIs=",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.1.0"
hashes = [
"h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=",
]
}
provider "registry.terraform.io/hashicorp/vault" {
version = "4.8.0"
constraints = "~> 4.0"
hashes = [
"h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
"h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
"zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
"zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
"zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
"zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
"zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
"zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
"zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
"zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
"zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
"zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
]
}

155
stacks/cyberchef/main.tf Normal file
View file

@ -0,0 +1,155 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
resource "kubernetes_namespace" "cyberchef" {
metadata {
name = "cyberchef"
labels = {
tier = local.tiers.aux
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.cyberchef.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "cyberchef" {
metadata {
name = "cyberchef"
namespace = kubernetes_namespace.cyberchef.metadata[0].name
labels = {
app = "cyberchef"
tier = local.tiers.aux
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
strategy {
type = "RollingUpdate"
}
selector {
match_labels = {
app = "cyberchef"
}
}
template {
metadata {
labels = {
app = "cyberchef"
}
annotations = {
"diun.enable" = "true"
"diun.include_tags" = "^v\\d+\\.\\d+\\.\\d+$"
}
}
spec {
container {
image = "mpepping/cyberchef:v9.55.0"
name = "cyberchef"
port {
container_port = 8000
}
resources {
requests = {
cpu = "10m"
memory = "64Mi"
}
limits = {
memory = "64Mi"
}
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "cyberchef" {
metadata {
name = "cc"
namespace = kubernetes_namespace.cyberchef.metadata[0].name
labels = {
"app" = "cyberchef"
}
}
spec {
selector = {
app = "cyberchef"
}
port {
name = "http"
target_port = 8000
port = 80
}
}
}
module "anubis" {
source = "../../modules/kubernetes/anubis_instance"
name = "cc"
namespace = kubernetes_namespace.cyberchef.metadata[0].name
target_url = "http://${kubernetes_service.cyberchef.metadata[0].name}.${kubernetes_namespace.cyberchef.metadata[0].name}.svc.cluster.local"
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/5"
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
auth = "none" # Anubis-fronted; PoW challenge gates bots, no Authentik
dns_type = "proxied"
namespace = kubernetes_namespace.cyberchef.metadata[0].name
name = "cc"
service_name = module.anubis.service_name
port = module.anubis.service_port
extra_middlewares = ["traefik-x402@kubernetescrd"]
tls_secret_name = var.tls_secret_name
anti_ai_scraping = false
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "CyberChef"
"gethomepage.dev/description" = "Data transformation toolkit"
"gethomepage.dev/icon" = "cyberchef.png"
"gethomepage.dev/group" = "Development & CI"
"gethomepage.dev/pod-selector" = ""
}
}
# CI retrigger 2026-05-16T13:42:57+00:00 bulk enrollment apply (pipeline #689 killed)
# CI retrigger v2 2026-05-16T13:46:35+00:00
# CI retrigger v3 2026-05-16T14:06:39Z
# CI retrigger v4 2026-05-16T14:13:59Z
# CI retrigger v5 2026-05-16T23:10:38Z
# CI retrigger v6 2026-05-16T23:18:58Z

View file

@ -0,0 +1,37 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
authentik = {
source = "goauthentik/authentik"
version = "~> 2024.10"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}

1
stacks/cyberchef/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}

73
stacks/dashy/.terraform.lock.hcl generated Normal file
View file

@ -0,0 +1,73 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.7"
constraints = "~> 4.0"
hashes = [
"h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
"zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
"zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
"zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
"zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
"zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
"zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
"zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
"zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
"zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
"zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
"zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
"zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
"zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.1.0"
hashes = [
"h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=",
]
}
provider "registry.terraform.io/hashicorp/vault" {
version = "4.8.0"
constraints = "~> 4.0"
hashes = [
"h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
"h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
"zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
"zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
"zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
"zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
"zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
"zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
"zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
"zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
"zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
"zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
]
}

425
stacks/dashy/conf.yml Normal file
View file

@ -0,0 +1,425 @@
pageInfo:
title: Dashy
description: Welcome to your new dashboard!
navLinks:
- title: GitHub
path: https://github.com/Lissy93/dashy
- title: Documentation
path: https://dashy.to/docs
appConfig:
theme: material
layout: auto
iconSize: large
sections:
- name: Infra
icon: si-databricks
displayData:
sortBy: alphabetical
rows: 2
cols: 2
collapsed: false
hideForGuests: false
items:
- &ref_0
title: ESXi R730 (Server)
description: R730 esxi UI
icon: si-vmware
url: https://esxi.viktorbarzin.me/ui/#/login
target: newtab
id: 0_496_esxirserver
- &ref_1
title: PFsense (Firewall)
description: Firewall
icon: si-pfsense
url: https://pfsense.viktorbarzin.me
target: newtab
id: 1_496_pfsensefirewall
- &ref_2
title: iDRAC
description: ""
icon: si-dell
url: https://idrac.viktorbarzin.me/
target: newtab
id: 2_496_idrac
- &ref_3
title: TP-Link Gateway Router
icon: hl-asus-router
url: https://gw.viktorbarzin.me/webpages/login.html
id: 3_496_tplinkgatewayrouter
- &ref_4
title: Home Assistant London
description: Home Assistant London Deployment
icon: si-homeassistant
url: http://ha-london.viktorbarzin.me/
target: newtab
id: 4_496_homeassistantlondon
- &ref_5
title: NAS
description: ""
icon: si-synology
url: https://nas.viktorbarzin.me/
id: 5_496_nas
- &ref_6
title: Server Switch
description: TP-Link Extension Switch
icon: 🔀
url: http://192.168.1.6/
target: newtab
id: 6_496_serverswitch
- &ref_7
title: Home Assistant Sofia
description: Home Assistant Sofia Deployment
icon: si-homeassistant
url: http://ha-sofia.viktorbarzin.me/
target: newtab
id: 7_496_homeassistantsofia
- &ref_8
title: IP Cameras
description: Frigate
icon: si-protodotio
url: https://frigate.viktorbarzin.me
target: newtab
id: 8_496_ipcameras
filteredItems:
- *ref_0
- *ref_1
- *ref_2
- *ref_3
- *ref_4
- *ref_5
- *ref_6
- *ref_7
- *ref_8
- name: Valchedrym Infra
displayData:
sortBy: default
rows: 2
cols: 2
collapsed: false
hideForGuests: false
items:
- &ref_9
title: Valchedrym OpenWRT
icon: si-openwrt
url: https://valchedrym.viktorbarzin.me/
target: newtab
id: 0_1567_valchedrymopenwrt
- &ref_10
title: Valchedram Video System
icon: 📷
url: http://valchedrym-video.viktorbarzin.me:5080/
target: newtab
id: 1_1567_valchedramvideosystem
- &ref_11
title: Mladost 3 Router
icon: si-ghostery
url: https://mladost3.viktorbarzin.me/
target: newtab
id: 2_1567_mladostrouter
- &ref_12
title: Valchedrym Services Uptime
description: Uptime Dashboard for Valchedrym Services
icon: si-openwrt
url: https://uptime.viktorbarzin.me/status/valchedrym
target: newtab
id: 3_1567_valchedrymservicesuptime
icon: 🐶
filteredItems:
- *ref_9
- *ref_10
- *ref_11
- *ref_12
- name: Monitoring
icon: hl-grafana
displayData:
sortBy: alphabetical
rows: 3
collapsed: false
hideForGuests: false
cols: 2
items:
- &ref_13
title: Uptime Kuma
description: Internal Uptime Monitoring
icon: si-uptimekuma
url: https://uptime.viktorbarzin.me/status/cluster-internal
target: newtab
id: 0_1062_uptimekuma
- &ref_14
title: iDRAC Grafana
icon: si-dell
url: https://grafana.viktorbarzin.me/d/O19gr0jZk/idrac-host-stats
target: newtab
statusCheckAcceptCodes: "400"
id: 1_1062_idracgrafana
- &ref_15
title: Kubernetes Cluster Nodes
description: Kubernetes Nodes Stats
icon: hl-kubernetes
url: https://grafana.viktorbarzin.me/d/xfpJB9FGz/node-exporter?orgId=1
target: newtab
statusCheckAcceptCodes: "400"
id: 2_1062_kubernetesclusternodes
- &ref_16
title: OpenWRT (London)
icon: si-openwrt
url: https://grafana.viktorbarzin.me/d/fLi0yXAWk/openwrt?orgId=1
target: newtab
statusCheckAcceptCodes: "400"
id: 3_1062_openwrtlondon
- &ref_17
title: Prometheus
icon: si-prometheus
url: https://prometheus.viktorbarzin.me/
statusCheck: false
statusCheckAcceptCodes: "400"
id: 4_1062_prometheus
- &ref_18
title: Alert Manager
icon: si-protractor
url: https://alertmanager.viktorbarzin.me/
target: newtab
id: 5_1062_alertmanager
- &ref_19
title: External Monitoring
description: Hetrix report
icon: si-amp
url: https://wl.hetrixtools.com/r/38981b548b5d38b052aca8d01285a3f3/
target: modal
id: 6_1062_externalmonitoring
- &ref_20
title: K8S Dashboard
description: Kubernetes dashboard with view of all nodes, pods etc
icon: si-kubernetes
url: https://k8s.viktorbarzin.me/#/node
id: 7_1062_ksdashboard
filteredItems:
- *ref_13
- *ref_14
- *ref_15
- *ref_16
- *ref_17
- *ref_18
- *ref_19
- *ref_20
- name: Infra Services
displayData:
sortBy: default
rows: 3
cols: 2
collapsed: false
hideForGuests: false
items:
- &ref_21
title: PhpMyAdmin
description: Admin UI for the DB Cluster
icon: si-phpmyadmin
url: https://pma.viktorbarzin.me/index.php
displayData: ttt
target: newtab
statusCheck: false
id: 0_1364_phpmyadmin
- &ref_22
title: Woodpecker CI
description: CI/CD Service
icon: si-woodpeckerci
url: https://ci.viktorbarzin.me/
target: newtab
id: 1_1364_woodpeckerci
- &ref_23
title: DNS Server
description: Technitium
icon: hl-azure-dns
url: https://technitium.viktorbarzin.me/
target: newtab
statusCheck: false
statusCheckAcceptCodes: "400"
id: 2_1364_dnsserver
- &ref_24
title: Headscale (VPN) UI
icon: si-wireguard
url: https://headscale.viktorbarzin.me/manager
target: newtab
statusCheck: false
statusCheckAcceptCodes: "400"
id: 3_1364_headscalevpnui
- &ref_25
title: URL Shorterner
description: Shlink
icon: si-curl
url: https://shlink.viktorbarzin.me
statusCheck: false
statusCheckAcceptCodes: "400"
id: 4_1364_urlshorterner
- &ref_26
title: Crowdsec Dashboard
icon: si-crowdsource
url: >-
https://crowdsec.viktorbarzin.me/public/dashboard/8f6226be-d4dc-45f1-bacf-a4584f71dcb0
target: newtab
id: 5_1364_crowdsecdashboard
- &ref_27
title: Redis
description: Redis
icon: si-redis
url: https://redis.viktorbarzin.me/
target: newtab
id: 6_1364_redis
icon: si-adminer
filteredItems:
- *ref_21
- *ref_22
- *ref_23
- *ref_24
- *ref_25
- *ref_26
- *ref_27
- name: Public Services
displayData:
sortBy: alphabetical
rows: 2
cols: 4
collapsed: false
hideForGuests: false
items:
- &ref_29
title: City Guesser
description: Geolocator Game
icon: hl-openmaptiles
url: https://city-guesser.viktorbarzin.me/
target: newtab
statusCheck: false
id: 0_1475_cityguesser
- &ref_30
title: Excalidraw
description: Collaborative Hand Drawing Tool
icon: hl-excalidraw-light
url: https://excalidraw.viktorbarzin.me
target: newtab
statusCheck: false
id: 1_1475_excalidraw
- &ref_31
title: Formula 1 Stream
icon: si-f1
url: http://f1.viktorbarzin.me/
statusCheck: false
id: 2_1475_formulastream
- &ref_32
title: HackMD
description: Collaborative Markdown Document Editing
icon: si-hackclub
url: https://hackmd.viktorbarzin.me/
statusCheck: false
id: 3_1475_hackmd
- &ref_33
title: Activate Windows (KMS)
description: How to activate Windows Machines
icon: si-windows95
url: https://kms.viktorbarzin.me/
statusCheck: false
id: 4_1475_activatewindowskms
- &ref_34
title: PrivateBin
description: E2E Encrypted Pastebin
icon: si-pastebin
url: https://pb.viktorbarzin.me/
statusCheck: false
id: 5_1475_privatebin
- &ref_35
title: Blog
description: Personal Blog
icon: si-rss
url: https://viktorbarzin.me/
statusCheck: false
id: 6_1475_blog
- &ref_36
title: Setup VPN (Tailscale)
description: "URL to set in app config: https://headscale.viktorbarzin.me"
icon: si-wireguard
url: https://github.com/juanfont/headscale/blob/main/docs/iOS-client.md
target: newtab
id: 7_1475_setupvpntailscale
- &ref_37
title: Vaultwarden
description: Self-hosted Bitwarden server (Password Manager)
icon: si-bitwarden
url: https://vaultwarden.viktorbarzin.me
target: newtab
id: 8_1475_vaultwarden
- &ref_38
title: Send
description: Share files
icon: si-libreoffice
url: https://send.viktorbarzin.me/
target: newtab
id: 9_1475_send
- &ref_39
title: Youtube Downloader
icon: si-youtube
url: https://yt.viktorbarzin.me
target: newtab
id: 10_1475_youtubedownloader
- &ref_40
title: Photos
description: Immich
icon: si-immich
url: https://photos.viktorbarzin.me
target: newtab
id: 11_1475_photos
- &ref_41
title: Audiobookshelf
description: >-
Audiobook shelf. For iOS, install app from
https://url.viktorbarzin.me/audiobookshelf
icon: si-audible
url: https://audiobookshelf.viktorbarzin.me/
target: newtab
id: 12_1475_audiobookshelf
- &ref_43
title: Paperless-ngx
description: Document index
icon: hl-paperless-ngx
url: https://pdf.viktorbarzin.me/
target: newtab
id: 14_1475_paperlessngx
icon: si-sublimetext
filteredItems:
- *ref_29
- *ref_30
- *ref_31
- *ref_32
- *ref_33
- *ref_34
- *ref_35
- *ref_36
- *ref_37
- *ref_38
- *ref_39
- *ref_40
- *ref_41
- *ref_43
- name: Under Construction
displayData:
sortBy: alphabetical
rows: 1
cols: 1
collapsed: false
hideForGuests: false
items:
- &ref_44
title: Travel Blog
icon: si-hugo
url: https://travel.viktorbarzin.me/
target: newtab
statusCheck: false
id: 0_1833_travelblog
- &ref_45
title: Personal Finance App
icon: si-abstract
url: https://finance.viktorbarzin.me/transaction
statusCheck: false
id: 1_1833_personalfinanceapp
icon: si-progress
filteredItems:
- *ref_44
- *ref_45

146
stacks/dashy/main.tf Normal file
View file

@ -0,0 +1,146 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.dashy.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_namespace" "dashy" {
metadata {
name = "dashy"
labels = {
"istio-injection" : "disabled"
tier = local.tiers.aux
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
resource "kubernetes_config_map" "config" {
metadata {
name = "config"
namespace = kubernetes_namespace.dashy.metadata[0].name
annotations = {
"reloader.stakater.com/match" = "true"
}
}
data = {
"conf.yml" = file("${path.module}/conf.yml")
}
}
resource "kubernetes_deployment" "dashy" {
metadata {
name = "dashy"
namespace = kubernetes_namespace.dashy.metadata[0].name
labels = {
app = "dashy"
tier = local.tiers.aux
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
# Disabled: reduce cluster memory pressure (2026-03-14 OOM incident)
replicas = 0
selector {
match_labels = {
app = "dashy"
}
}
template {
metadata {
annotations = {
# "diun.enable" = "true"
}
labels = {
app = "dashy"
}
}
spec {
container {
image = "lissy93/dashy:latest"
name = "dashy"
resources {
requests = {
cpu = "250m"
memory = "1Gi"
}
limits = {
memory = "1Gi"
}
}
port {
container_port = 8080
}
volume_mount {
name = "config"
mount_path = "/app/user-data/"
}
}
volume {
name = "config"
config_map {
name = "config"
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "dashy" {
metadata {
name = "dashy"
namespace = kubernetes_namespace.dashy.metadata[0].name
labels = {
app = "dashy"
}
}
spec {
selector = {
app = "dashy"
}
port {
name = "http"
port = 80
target_port = 8080
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
dns_type = "proxied"
namespace = kubernetes_namespace.dashy.metadata[0].name
name = "dashy"
tls_secret_name = var.tls_secret_name
auth = "required" # hidden as we use homepage now
}

33
stacks/dashy/providers.tf Normal file
View file

@ -0,0 +1,33 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}

1
stacks/dashy/secrets Symbolic link
View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}

81
stacks/dawarich/.terraform.lock.hcl generated Normal file
View file

@ -0,0 +1,81 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/cloudflare/cloudflare" {
version = "4.52.7"
constraints = "~> 4.0"
hashes = [
"h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
"zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
"zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
"zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
"zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
"zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
"zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
"zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
"zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
"zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
"zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
"zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
"zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
"zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
"zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
"zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
]
}
provider "registry.terraform.io/goauthentik/authentik" {
version = "2024.12.1"
constraints = "~> 2024.10"
hashes = [
"h1:roBMd+gi+TGgikH/bMzEI8JfvJiMAQWt+8FmokCrQIs=",
]
}
provider "registry.terraform.io/hashicorp/helm" {
version = "3.1.1"
hashes = [
"h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
"h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
"zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
"zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
"zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
"zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
"zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
"zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
"zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
"zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
"zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
"zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
"zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
]
}
provider "registry.terraform.io/hashicorp/kubernetes" {
version = "3.1.0"
hashes = [
"h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=",
]
}
provider "registry.terraform.io/hashicorp/vault" {
version = "4.8.0"
constraints = "~> 4.0"
hashes = [
"h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
"h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
"zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
"zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
"zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
"zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
"zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
"zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
"zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
"zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
"zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
"zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
"zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
]
}

534
stacks/dawarich/main.tf Normal file
View file

@ -0,0 +1,534 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
variable "image_version" {
type = string
default = "1.7.11"
}
variable "nfs_server" { type = string }
variable "redis_host" { type = string }
variable "postgresql_host" { type = string }
resource "kubernetes_namespace" "dawarich" {
metadata {
name = "dawarich"
labels = {
"istio-injection" : "disabled"
tier = local.tiers.edge
"keel.sh/enrolled" = "true"
}
}
}
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "dawarich-secrets"
namespace = "dawarich"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "dawarich-secrets"
}
dataFrom = [{
extract = {
key = "dawarich"
}
}]
}
}
depends_on = [kubernetes_namespace.dawarich]
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.dawarich.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_deployment" "dawarich" {
metadata {
name = "dawarich"
namespace = kubernetes_namespace.dawarich.metadata[0].name
labels = {
app = "dawarich"
tier = local.tiers.edge
}
annotations = {
"reloader.stakater.com/search" = "true"
}
}
spec {
replicas = 1
strategy {
type = "Recreate"
}
selector {
match_labels = {
app = "dawarich"
}
}
template {
metadata {
labels = {
app = "dawarich"
}
annotations = {
"diun.enable" = "true"
"diun.include_tags" = "^v?\\d+\\.\\d+\\.\\d+$"
"dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432,redis-master.redis:6379"
}
}
spec {
termination_grace_period_seconds = 60
container {
image = "freikin/dawarich:${var.image_version}"
name = "dawarich"
port {
name = "http"
container_port = 3000
}
port {
name = "prometheus"
container_port = 9394
}
command = ["web-entrypoint.sh"]
args = ["bin/rails", "server", "-p", "3000", "-b", "::"]
env {
name = "REDIS_URL"
value = "redis://${var.redis_host}:6379"
}
env {
name = "DATABASE_HOST"
value = var.postgresql_host
}
env {
name = "DATABASE_USERNAME"
value = "dawarich"
}
env {
name = "DATABASE_PASSWORD"
value_from {
secret_key_ref {
name = "dawarich-secrets"
key = "db_password"
}
}
}
env {
name = "DATABASE_NAME"
value = "dawarich"
}
env {
name = "MIN_MINUTES_SPENT_IN_CITY"
value = "60"
}
env {
name = "TIME_ZONE"
value = "Europe/London"
}
env {
name = "DISTANCE_UNIT"
value = "km"
}
env {
name = "ENABLE_TELEMETRY"
value = "true"
}
env {
name = "APPLICATION_HOSTS"
value = "dawarich.viktorbarzin.me"
}
# env {
# name = "PROMETHEUS_EXPORTER_ENABLED"
# value = "true"
# }
# env {
# name = "PROMETHEUS_EXPORTER_PORT"
# value = "9394"
# }
# env {
# name = "PROMETHEUS_EXPORTER_HOST"
# value = "0.0.0.0"
# }
env {
name = "RAILS_ENV"
value = "production"
}
env {
name = "SECRET_KEY_BASE"
value_from {
secret_key_ref {
name = "dawarich-secrets"
key = "secret_key_base"
}
}
}
env {
name = "RAILS_LOG_TO_STDOUT"
value = "true"
}
env {
name = "SELF_HOSTED"
value = "true"
}
# env {
# name = "PHOTON_API_HOST"
# value = "photon.dawarich"
# }
# volume_mount {
# name = "watched"
# mount_path = "/var/app/tmp/imports/watched"
# }
resources {
requests = {
cpu = "15m"
memory = "896Mi"
}
limits = {
memory = "896Mi"
}
}
}
container {
image = "freikin/dawarich:${var.image_version}"
name = "dawarich-sidekiq"
command = ["sidekiq-entrypoint.sh"]
args = ["bundle exec sidekiq"]
env {
name = "REDIS_URL"
value = "redis://${var.redis_host}:6379"
}
env {
name = "DATABASE_HOST"
value = var.postgresql_host
}
env {
name = "DATABASE_USERNAME"
value = "dawarich"
}
env {
name = "DATABASE_PASSWORD"
value_from {
secret_key_ref {
name = "dawarich-secrets"
key = "db_password"
}
}
}
env {
name = "DATABASE_NAME"
value = "dawarich"
}
env {
name = "MIN_MINUTES_SPENT_IN_CITY"
value = "60"
}
env {
name = "TIME_ZONE"
value = "Europe/London"
}
env {
name = "DISTANCE_UNIT"
value = "km"
}
env {
name = "BACKGROUND_PROCESSING_CONCURRENCY"
value = "2"
}
env {
name = "ENABLE_TELEMETRY"
value = "true"
}
env {
name = "APPLICATION_HOSTS"
value = "dawarich.viktorbarzin.me"
}
# Prometheus exporter disabled until a standalone `prometheus_exporter`
# server sidecar is added see follow-up bead. The client middleware
# pushes over TCP to PROMETHEUS_EXPORTER_HOST:PORT, it does not start
# a listener itself. Keeping ENABLED=false silences the reconnect
# log spam (~2/sec) from PrometheusExporter::Client.
env {
name = "PROMETHEUS_EXPORTER_ENABLED"
value = "false"
}
env {
name = "RAILS_ENV"
value = "production"
}
env {
name = "SECRET_KEY_BASE"
value_from {
secret_key_ref {
name = "dawarich-secrets"
key = "secret_key_base"
}
}
}
env {
name = "RAILS_LOG_TO_STDOUT"
value = "true"
}
env {
name = "SELF_HOSTED"
value = "true"
}
env {
name = "GEOAPIFY_API_KEY"
value_from {
secret_key_ref {
name = "dawarich-secrets"
key = "geoapify_api_key"
}
}
}
resources {
requests = {
cpu = "50m"
memory = "768Mi"
}
limits = {
memory = "1Gi"
}
}
liveness_probe {
exec {
command = ["/bin/sh", "-c", "pgrep -f 'bundle exec sidekiq' >/dev/null"]
}
initial_delay_seconds = 90
period_seconds = 30
timeout_seconds = 5
failure_threshold = 3
}
readiness_probe {
exec {
command = ["/bin/sh", "-c", "pgrep -f 'bundle exec sidekiq' >/dev/null"]
}
initial_delay_seconds = 30
period_seconds = 15
timeout_seconds = 5
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[1].image,
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
# resource "kubernetes_deployment" "photon" {
# metadata {
# name = "photon"
# namespace = kubernetes_namespace.dawarich.metadata[0].name
# labels = {
# app = "photon"
# }
# }
# spec {
# replicas = 1
# strategy {
# type = "Recreate"
# }
# selector {
# match_labels = {
# app = "photon"
# }
# }
# template {
# metadata {
# labels = {
# app = "photon"
# }
# }
# spec {
# container {
# image = "rtuszik/photon-docker:latest"
# name = "photon"
# port {
# name = "tcp"
# container_port = 2322
# }
# env {
# name = "COUNTRY_CODE"
# value = "bg"
# }
# volume_mount {
# name = "data"
# mount_path = "/photon/photon_data"
# }
# }
# volume {
# name = "data"
# nfs {
# path = "/mnt/main/photon"
# server = var.nfs_server
# }
# }
# }
# }
# }
# }
resource "kubernetes_service" "dawarich" {
metadata {
name = "dawarich"
namespace = kubernetes_namespace.dawarich.metadata[0].name
labels = {
"app" = "dawarich"
}
}
spec {
selector = {
app = "dawarich"
}
port {
name = "http"
port = 80
target_port = 3000
protocol = "TCP"
}
}
}
# resource "kubernetes_service" "photon" {
# metadata {
# name = "photon"
# namespace = kubernetes_namespace.dawarich.metadata[0].name
# labels = {
# "app" = "photon"
# }
# }
# spec {
# selector = {
# app = "photon"
# }
# port {
# name = "http"
# port = 2322
# target_port = 2322
# protocol = "TCP"
# }
# }
# }
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
# owntracks bridge hook posts to /api/v1/owntracks/points?api_key=... from
# outside the cluster; mobile location apps also POST programmatically with
# an api_key. Forward-auth would 302 these clients into a login they can't
# complete. Dawarich enforces api_key at app layer.
# auth = "none": Location tracking API mobile apps + OwnTracks bridge POST via api_key; forward-auth 302s break programmatic clients.
auth = "none"
dns_type = "proxied"
namespace = kubernetes_namespace.dawarich.metadata[0].name
name = "dawarich"
tls_secret_name = var.tls_secret_name
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Dawarich"
"gethomepage.dev/description" = "Location history"
"gethomepage.dev/icon" = "dawarich.png"
"gethomepage.dev/group" = "Smart Home"
"gethomepage.dev/pod-selector" = ""
}
}
# Paired with DawarichIngestionStale alert in monitoring/prometheus_chart_values.tpl.
resource "kubernetes_cron_job_v1" "ingestion_freshness_monitor" {
metadata {
name = "ingestion-freshness-monitor"
namespace = kubernetes_namespace.dawarich.metadata[0].name
}
spec {
concurrency_policy = "Forbid"
failed_jobs_history_limit = 3
schedule = "30 6 * * *"
starting_deadline_seconds = 300
successful_jobs_history_limit = 1
job_template {
metadata {}
spec {
backoff_limit = 2
ttl_seconds_after_finished = 3600
template {
metadata {}
spec {
restart_policy = "OnFailure"
container {
name = "ingestion-freshness-monitor"
image = "docker.io/library/postgres:16-alpine"
env {
name = "PGPASSWORD"
value_from {
secret_key_ref {
name = "dawarich-secrets"
key = "db_password"
}
}
}
command = ["/bin/sh", "-c", <<-EOT
set -eu
apk add --no-cache curl >/dev/null 2>&1 || true
TS=$(PGPASSWORD=$PGPASSWORD psql -h ${var.postgresql_host} -U dawarich -d dawarich -t -A -c \
"SELECT COALESCE(EXTRACT(epoch FROM MAX(created_at))::bigint, 0) FROM points WHERE user_id = 1;")
NOW=$(date +%s)
if [ -z "$TS" ] || [ "$TS" = "0" ]; then
echo "ERROR: no points found for user_id=1"
exit 1
fi
AGE_H=$(( (NOW - TS) / 3600 ))
echo "last_point_ts=$TS now=$NOW age_hours=$AGE_H"
curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/dawarich-ingestion-freshness/user/viktor" <<METRICS
# TYPE dawarich_last_point_ingested_timestamp gauge
dawarich_last_point_ingested_timestamp $TS
# TYPE dawarich_ingestion_monitor_last_push_timestamp gauge
dawarich_ingestion_monitor_last_push_timestamp $NOW
METRICS
EOT
]
}
}
}
}
}
}
lifecycle {
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
}
}

View file

@ -0,0 +1,37 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
authentik = {
source = "goauthentik/authentik"
version = "~> 2024.10"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}

Some files were not shown because too many files have changed in this diff Show more