From 50e8184d99e4a20724af9e8c40b4284a22c7d4ff Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 18 Apr 2026 12:04:17 +0000 Subject: [PATCH] [uptime-kuma] Codify MySQL monitor (id=663) via idempotent sync CronJob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context Monitor id 663 "MySQL Standalone (dbaas)" was created manually yesterday via the `uptime-kuma-api` Python library when the dbaas stack migrated from InnoDB Cluster to standalone MySQL. It worked and was UP, but lived only in Uptime Kuma's MariaDB — if UK's DB were wiped or restored from an older backup, the monitor would be lost. ## This change Adds declarative, self-healing management for internal-service monitors (databases, non-HTTP endpoints) that can't be discovered from ingress annotations. Modelled on the existing `external-monitor-sync` CronJob. - `local.internal_monitors` — list of desired monitors (name, type, connection string, Vault password key, interval, retries). Seeded with the MySQL Standalone monitor. Add new entries here to manage more. - `kubernetes_secret.internal_monitor_sync` — pulls admin password and all referenced DB passwords from Vault `secret/viktor` at apply time. Secret key names are derived from monitor name (`DB_PASSWORD_`). - `kubernetes_config_map_v1.internal_monitor_targets` — renders the target list to JSON for the sync container. - `kubernetes_cron_job_v1.internal_monitor_sync` — runs every 10 min, looks up monitors by name, creates if missing, patches if drifted, leaves id and history untouched when already in desired state. ## Why this approach (Option B, not a Terraform provider) The `louislam/uptime-kuma` Terraform provider does NOT exist in the public registry (verified — only a CLI tool of the same name). Option A from the task brief was therefore unavailable. Option B (idempotent K8s CronJob) matches the established pattern in the same module for `external-monitor-sync` — no new machinery introduced. ## Monitor 663: no-op on first sync Manual import was not possible (no provider → no state to import). The sync job correctly identifies the existing monitor by name and reports: Monitor MySQL Standalone (dbaas) (id=663) already in desired state Internal monitor sync complete DB heartbeats confirm monitor 663 stayed UP throughout with `status=1` and `Rows: 1` responses every 60s — no disruption. ## Vault key — left manual (by design) `secret/viktor` is not Terraform-managed anywhere in the repo (only read via `data "vault_kv_secret_v2"`). It is a user-edited Vault entry holding 135 keys. The `uptimekuma_db_password` key was added manually yesterday; this change does NOT codify it. Codifying the whole `secret/viktor` entry is out of scope for this task (would need a separate migration + rotation story). The sync job reads the existing value at apply time — so if the value is ever rotated in Vault, the next sync picks it up. ## Plan + apply Plan: 3 to add, 0 to change, 0 to destroy. Apply complete! Resources: 3 added, 0 changed, 0 destroyed. Re-plan: No changes. Your infrastructure matches the configuration. Also updated `.claude/skills/uptime-kuma/SKILL.md` with the new pattern. Closes: code-ed2 --- .claude/skills/uptime-kuma/SKILL.md | 16 ++ .../uptime-kuma/modules/uptime-kuma/main.tf | 190 ++++++++++++++++++ 2 files changed, 206 insertions(+) diff --git a/.claude/skills/uptime-kuma/SKILL.md b/.claude/skills/uptime-kuma/SKILL.md index 912912d6..dc982b4b 100644 --- a/.claude/skills/uptime-kuma/SKILL.md +++ b/.claude/skills/uptime-kuma/SKILL.md @@ -155,3 +155,19 @@ Common port is 80. Exceptions: 3. Add `time.sleep(0.3)` between bulk operations to avoid overloading 4. Homepage dashboard widget slug: `cluster-internal` 5. Cloudflare-proxied at `uptime.viktorbarzin.me` + +## Terraform-Managed Monitors + +There is NO `louislam/uptime-kuma` Terraform provider. Two patterns exist for +declarative monitor management in this stack: + +- **External HTTPS monitors** — auto-discovered from ingress annotations by the + `external-monitor-sync` CronJob (`*/10 * * * *`). Opt-out via + `uptime.viktorbarzin.me/external-monitor: "false"` on the ingress. +- **Internal monitors (DBs, non-HTTP)** — declared in the + `local.internal_monitors` list in `stacks/uptime-kuma/modules/uptime-kuma/main.tf` + and synced by the `internal-monitor-sync` CronJob. To add one, append to the + list (provide `name`, `type`, `database_connection_string`, + `database_password_vault_key`, `interval`, `retry_interval`, `max_retries`) + and `scripts/tg apply`. The sync is idempotent — looks up by name, creates + if missing, patches if drifted. Existing monitors keep their id and history. diff --git a/stacks/uptime-kuma/modules/uptime-kuma/main.tf b/stacks/uptime-kuma/modules/uptime-kuma/main.tf index a3d2a55b..174ca0f0 100644 --- a/stacks/uptime-kuma/modules/uptime-kuma/main.tf +++ b/stacks/uptime-kuma/modules/uptime-kuma/main.tf @@ -520,3 +520,193 @@ PYEOF ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] } } + +# ============================================================================= +# Internal Monitor Sync +# Declaratively manages monitors for internal services (databases, non-HTTP +# endpoints) that can't be discovered from ingress annotations. Idempotent: +# looks up monitors by name, creates if missing, patches if drifted. +# +# Why a CronJob and not a one-shot Job: +# - louislam/uptime-kuma has no Terraform provider (only a CLI tool). +# - UK v2 stores monitors in MariaDB (`uptimekuma` on mysql.dbaas); if the DB +# is wiped/restored we must re-create them. +# - CronJob self-heals drift (manual UI edits, UK restarts, DB restores). +# +# Managed monitors (name -> desired spec) are defined in local.internal_monitors +# below. Add new internal-service monitors there. +# ============================================================================= + +locals { + internal_monitors = [ + { + name = "MySQL Standalone (dbaas)" + type = "mysql" + database_connection_string = "mysql://uptimekuma@mysql.dbaas.svc.cluster.local:3306" + database_password_vault_key = "uptimekuma_db_password" + interval = 60 + retry_interval = 60 + max_retries = 2 + }, + ] +} + +resource "kubernetes_secret" "internal_monitor_sync" { + metadata { + name = "internal-monitor-sync" + namespace = kubernetes_namespace.uptime-kuma.metadata[0].name + } + data = merge( + { UPTIME_KUMA_PASSWORD = data.vault_kv_secret_v2.viktor.data["uptime_kuma_admin_password"] }, + { + for m in local.internal_monitors : + "DB_PASSWORD_${upper(replace(m.name, "/[^A-Za-z0-9]/", "_"))}" => + data.vault_kv_secret_v2.viktor.data[m.database_password_vault_key] + }, + ) +} + +resource "kubernetes_config_map_v1" "internal_monitor_targets" { + metadata { + name = "internal-monitor-targets" + namespace = kubernetes_namespace.uptime-kuma.metadata[0].name + } + data = { + "targets.json" = jsonencode([ + for m in local.internal_monitors : { + name = m.name + type = m.type + database_connection_string = m.database_connection_string + password_env = "DB_PASSWORD_${upper(replace(m.name, "/[^A-Za-z0-9]/", "_"))}" + interval = m.interval + retry_interval = m.retry_interval + max_retries = m.max_retries + } + ]) + } +} + +resource "kubernetes_cron_job_v1" "internal_monitor_sync" { + metadata { + name = "internal-monitor-sync" + namespace = kubernetes_namespace.uptime-kuma.metadata[0].name + } + spec { + concurrency_policy = "Forbid" + failed_jobs_history_limit = 3 + successful_jobs_history_limit = 3 + schedule = "*/10 * * * *" + job_template { + metadata {} + spec { + backoff_limit = 1 + ttl_seconds_after_finished = 300 + template { + metadata {} + spec { + container { + name = "sync" + image = "docker.io/library/python:3.12-alpine" + command = ["/bin/sh", "-c", <<-EOT + pip install --quiet --disable-pip-version-check uptime-kuma-api + python3 << 'PYEOF' +import json, os, time +from uptime_kuma_api import UptimeKumaApi, MonitorType + +UPTIME_KUMA_URL = "http://uptime-kuma.uptime-kuma.svc.cluster.local" +UPTIME_KUMA_PASS = os.environ["UPTIME_KUMA_PASSWORD"] + +with open("/config/targets.json") as f: + targets = json.load(f) + +api = UptimeKumaApi(UPTIME_KUMA_URL, timeout=120, wait_events=0.2) +api.login("admin", UPTIME_KUMA_PASS) + +existing = {m["name"]: m for m in api.get_monitors()} + +for t in targets: + name = t["name"] + password = os.environ[t["password_env"]] + # MYSQL monitors use `databaseConnectionString` + `radiusPassword` + # (UK v2 re-uses the radiusPassword field for mysql auth — backwards compat). + desired = { + "type": MonitorType(t["type"]), + "name": name, + "databaseConnectionString": t["database_connection_string"], + "radiusPassword": password, + "interval": t["interval"], + "retryInterval": t["retry_interval"], + "maxretries": t["max_retries"], + } + if name not in existing: + print(f"Creating monitor: {name}") + api.add_monitor(**desired) + continue + m = existing[name] + drifted = ( + m.get("databaseConnectionString") != desired["databaseConnectionString"] + or m.get("radiusPassword") != desired["radiusPassword"] + or m.get("interval") != desired["interval"] + or m.get("retryInterval") != desired["retryInterval"] + or m.get("maxretries") != desired["maxretries"] + ) + if drifted: + print(f"Updating monitor {name} (id={m['id']})") + api.edit_monitor( + m["id"], + databaseConnectionString=desired["databaseConnectionString"], + radiusPassword=desired["radiusPassword"], + interval=desired["interval"], + retryInterval=desired["retryInterval"], + maxretries=desired["maxretries"], + ) + else: + print(f"Monitor {name} (id={m['id']}) already in desired state") + time.sleep(0.3) + +api.disconnect() +print("Internal monitor sync complete") +PYEOF + EOT + ] + env_from { + secret_ref { + name = kubernetes_secret.internal_monitor_sync.metadata[0].name + } + } + volume_mount { + name = "config" + mount_path = "/config" + read_only = true + } + resources { + requests = { + memory = "128Mi" + cpu = "10m" + } + limits = { + memory = "256Mi" + } + } + } + volume { + name = "config" + config_map { + name = kubernetes_config_map_v1.internal_monitor_targets.metadata[0].name + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } + } + } + lifecycle { + ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] + } +}