From 9492874c43f64a66712448cecc437f17e0de5cf6 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 6 Apr 2026 13:00:49 +0300 Subject: [PATCH] fix: restore technitium MySQL query logging with Vault auto-rotation [ci skip] Query logs stopped syncing on 2026-03-16 due to password mismatch after MySQL cluster rebuild and Technitium app config reset. - Add Vault static role mysql-technitium (7-day rotation) - Add ExternalSecret for technitium-db-creds in technitium namespace - Add password-sync CronJob (6h) to push rotated password to Technitium API - Update Grafana datasource to use ESO-managed password - Remove stale technitium_db_password variable (replaced by ESO) - Update databases.md and restore-mysql.md runbook --- .claude/CLAUDE.md | 2 +- docs/architecture/databases.md | 12 ++- docs/runbooks/restore-mysql.md | 21 +++- stacks/technitium/main.tf | 17 ++- stacks/technitium/modules/technitium/main.tf | 105 ++++++++++++++++++- stacks/vault/main.tf | 11 +- 6 files changed, 149 insertions(+), 19 deletions(-) diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 19049acc..0498d524 100755 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -54,7 +54,7 @@ Violations cause state drift, which causes future applies to break or silently r - **ESO (External Secrets Operator)**: `stacks/external-secrets/` — 43 ExternalSecrets + 9 DB-creds ExternalSecrets. API version `v1beta1`. Two ClusterSecretStores: `vault-kv` and `vault-database`. - **Plan-time pattern**: Former plan-time stacks use `data "kubernetes_secret"` to read ESO-created K8s Secrets at plan time (no Vault dependency). First-apply gotcha: must `terragrunt apply -target=kubernetes_manifest.external_secret` first, then full apply. `count` on resources using secret values fails — remove conditional counts. - **14 hybrid stacks** still keep `data "vault_kv_secret_v2"` for plan-time needs (job commands, Helm templatefile, module inputs). Platform has 48 plan-time refs — no migration possible without restructuring modules. -- **Database rotation**: Vault DB engine rotates passwords every 24h. MySQL: speedtest, wrongmove, codimd, nextcloud, shlink, grafana. PostgreSQL: trading, health, linkwarden, affine, woodpecker, claude_memory. Excluded: authentik (PgBouncer), technitium/crowdsec (Helm-baked), root users. +- **Database rotation**: Vault DB engine rotates passwords every 24h. MySQL: speedtest, wrongmove, codimd, nextcloud, shlink, grafana, technitium. PostgreSQL: trading, health, linkwarden, affine, woodpecker, claude_memory. Excluded: authentik (PgBouncer), crowdsec (Helm-baked), root users. Technitium uses a password-sync CronJob (every 6h) to push rotated password to the Technitium app config via API. - **K8s credentials**: Vault K8s secrets engine. Roles: `dashboard-admin`, `ci-deployer`, `openclaw`, `local-admin`. Use `vault write kubernetes/creds/ROLE kubernetes_namespace=NS`. Helper: `scripts/vault-kubeconfig`. - **CI/CD (GHA + Woodpecker)**: Docker builds run on **GitHub Actions** (free on public repos). Woodpecker is **deploy-only** — receives image tag via API POST, runs `kubectl set image`. Woodpecker authenticates via K8s SA JWT → Vault K8s auth. Sync CronJob pushes `secret/ci/global` → Woodpecker API every 6h. Shell scripts in HCL heredocs: escape `$` → `$$`, `%{}` → `%%{}`. - **Platform cannot depend on vault** (circular). Apply order: vault first, then platform. Platform has 48 vault refs, all in module inputs — no ESO migration possible. diff --git a/docs/architecture/databases.md b/docs/architecture/databases.md index 2a2c8c4b..013c5ade 100644 --- a/docs/architecture/databases.md +++ b/docs/architecture/databases.md @@ -117,6 +117,7 @@ graph TB - nextcloud - shlink - grafana +- technitium (DNS query logs via QueryLogsMySqlApp plugin) ### Redis @@ -157,17 +158,18 @@ graph TB - nextcloud - shlink - grafana +- technitium (password synced to Technitium DNS app via CronJob every 6h) **Excluded from Rotation**: - authentik (uses PgBouncer, incompatible) -- technitium, crowdsec (Helm-baked credentials) +- crowdsec (Helm-baked credentials) - Root users (manual management) **How Rotation Works**: -1. Vault creates new user with same permissions -2. App fetches new credentials on next Vault lease renewal -3. Old credentials revoked after grace period -4. Zero-downtime rotation +1. Vault rotates the MySQL user's password (static role, 7-day period) +2. ExternalSecrets Operator syncs new password to K8s Secret (15-min refresh) +3. Apps read from K8s Secret via `secret_key_ref` env vars +4. Special case: Technitium stores its MySQL connection in internal app config, so a CronJob pushes the rotated password to the Technitium API every 6 hours ## Configuration diff --git a/docs/runbooks/restore-mysql.md b/docs/runbooks/restore-mysql.md index 04d7992c..704a27ce 100644 --- a/docs/runbooks/restore-mysql.md +++ b/docs/runbooks/restore-mysql.md @@ -62,7 +62,26 @@ for db in speedtest wrongmove codimd nextcloud shlink grafana; do done ``` -### 5. InnoDB Cluster Recovery +### 5. Verify application MySQL users exist + +After any cluster rebuild or PVC recreation, the MySQL operator only recreates its own system users. Application users may be lost. + +```bash +ROOT_PWD=$(kubectl get secret cluster-secret -n dbaas -o jsonpath='{.data.ROOT_PASSWORD}' | base64 -d) + +# Check all expected application users exist +kubectl exec -n dbaas mysql-cluster-0 -c mysql -- mysql -u root -p"$ROOT_PWD" \ + -e "SELECT user, host FROM mysql.user WHERE user IN ('nextcloud','forgejo','crowdsec','grafana','speedtest','wrongmove','codimd','shlink','technitium','uptimekuma');" + +# If users are missing, force Vault to re-rotate their credentials: +# vault write -f database/rotate-role/mysql- +# This will recreate the user with the correct password. +# +# For technitium specifically, also run the password sync CronJob: +# kubectl create job --from=cronjob/technitium-password-sync technitium-pw-resync -n technitium +``` + +### 6. InnoDB Cluster Recovery If the InnoDB Cluster itself is broken (not just data loss): ```bash # Check cluster status via MySQL Shell diff --git a/stacks/technitium/main.tf b/stacks/technitium/main.tf index 751d1159..59dbaf3a 100644 --- a/stacks/technitium/main.tf +++ b/stacks/technitium/main.tf @@ -12,13 +12,12 @@ locals { } module "technitium" { - source = "./modules/technitium" - tls_secret_name = var.tls_secret_name - nfs_server = var.nfs_server - mysql_host = var.mysql_host - homepage_token = local.homepage_credentials["technitium"]["token"] - technitium_db_password = data.vault_kv_secret_v2.secrets.data["technitium_db_password"] - technitium_username = data.vault_kv_secret_v2.secrets.data["technitium_username"] - technitium_password = data.vault_kv_secret_v2.secrets.data["technitium_password"] - tier = local.tiers.core + source = "./modules/technitium" + tls_secret_name = var.tls_secret_name + nfs_server = var.nfs_server + mysql_host = var.mysql_host + homepage_token = local.homepage_credentials["technitium"]["token"] + technitium_username = data.vault_kv_secret_v2.secrets.data["technitium_username"] + technitium_password = data.vault_kv_secret_v2.secrets.data["technitium_password"] + tier = local.tiers.core } diff --git a/stacks/technitium/modules/technitium/main.tf b/stacks/technitium/modules/technitium/main.tf index afcb5856..caba2fef 100644 --- a/stacks/technitium/modules/technitium/main.tf +++ b/stacks/technitium/modules/technitium/main.tf @@ -1,7 +1,6 @@ variable "tls_secret_name" {} variable "tier" { type = string } variable "homepage_token" {} -variable "technitium_db_password" {} variable "nfs_server" { type = string } variable "mysql_host" { type = string } variable "technitium_username" { type = string } @@ -330,6 +329,44 @@ module "ingress-doh" { service_name = "technitium-web" } +# ExternalSecret for Technitium MySQL password (Vault auto-rotation) +resource "kubernetes_manifest" "external_secret" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { + name = "technitium-db-creds" + namespace = kubernetes_namespace.technitium.metadata[0].name + } + spec = { + refreshInterval = "15m" + secretStoreRef = { + name = "vault-database" + kind = "ClusterSecretStore" + } + target = { + name = "technitium-db-creds" + } + data = [{ + secretKey = "db_password" + remoteRef = { + key = "static-creds/mysql-technitium" + property = "password" + } + }] + } + } + depends_on = [kubernetes_namespace.technitium] +} + +data "kubernetes_secret" "technitium_db_creds" { + metadata { + name = "technitium-db-creds" + namespace = kubernetes_namespace.technitium.metadata[0].name + } + depends_on = [kubernetes_manifest.external_secret] +} + # Grafana datasource for Technitium DNS query logs in MySQL resource "kubernetes_config_map" "grafana_technitium_datasource" { metadata { @@ -351,7 +388,7 @@ resource "kubernetes_config_map" "grafana_technitium_datasource" { user = "technitium" uid = "technitium-mysql" secureJsonData = { - password = var.technitium_db_password + password = data.kubernetes_secret.technitium_db_creds.data["db_password"] } }] }) @@ -375,3 +412,67 @@ resource "kubernetes_config_map" "grafana_technitium_dashboard" { } } +# CronJob to sync Vault-rotated MySQL password into Technitium's app config +resource "kubernetes_cron_job_v1" "technitium_password_sync" { + metadata { + name = "technitium-password-sync" + namespace = kubernetes_namespace.technitium.metadata[0].name + } + spec { + schedule = "0 */6 * * *" + successful_jobs_history_limit = 1 + failed_jobs_history_limit = 3 + job_template { + metadata {} + spec { + template { + metadata {} + spec { + container { + name = "sync" + image = "curlimages/curl:latest" + resources { + requests = { + cpu = "10m" + memory = "32Mi" + } + limits = { + memory = "32Mi" + } + } + env { + name = "DB_PASSWORD" + value_from { + secret_key_ref { + name = "technitium-db-creds" + key = "db_password" + } + } + } + env { + name = "TECH_USER" + value = var.technitium_username + } + env { + name = "TECH_PASS" + value = var.technitium_password + } + command = ["/bin/sh", "-c", <<-EOT + set -e + TOKEN=$$(curl -sf "http://technitium-web:5380/api/user/login?user=$$TECH_USER&pass=$$TECH_PASS" | grep -o '"token":"[^"]*"' | cut -d'"' -f4) + if [ -z "$$TOKEN" ]; then echo "Login failed"; exit 1; fi + CONFIG="{\"enableLogging\":true,\"maxQueueSize\":1000000,\"maxLogDays\":0,\"maxLogRecords\":0,\"databaseName\":\"technitium\",\"connectionString\":\"Server=mysql.dbaas.svc.cluster.local; Port=3306; Uid=technitium; Pwd=$$DB_PASSWORD;\"}" + APP_NAME="Query Logs (MySQL)" + curl -sf -X POST "http://technitium-web:5380/api/apps/config/set?token=$$TOKEN" --data-urlencode "name=$$APP_NAME" --data-urlencode "config=$$CONFIG" + echo "Password sync complete" + EOT + ] + } + restart_policy = "OnFailure" + } + } + } + } + } +} + diff --git a/stacks/vault/main.tf b/stacks/vault/main.tf index 5818bc2c..c106dd51 100644 --- a/stacks/vault/main.tf +++ b/stacks/vault/main.tf @@ -465,7 +465,8 @@ resource "vault_database_secret_backend_connection" "mysql" { name = "mysql" allowed_roles = [ "mysql-speedtest", "mysql-wrongmove", "mysql-codimd", - "mysql-nextcloud", "mysql-shlink", "mysql-grafana" + "mysql-nextcloud", "mysql-shlink", "mysql-grafana", + "mysql-technitium" ] mysql { @@ -544,6 +545,14 @@ resource "vault_database_secret_backend_static_role" "mysql_grafana" { rotation_period = 604800 } +resource "vault_database_secret_backend_static_role" "mysql_technitium" { + backend = vault_mount.database.path + db_name = vault_database_secret_backend_connection.mysql.name + name = "mysql-technitium" + username = "technitium" + rotation_period = 604800 +} + # --- PostgreSQL Static Roles --- /*