From f1f723be83fdcaa174627ebec4ea16890076c8f4 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 22 Apr 2026 17:47:18 +0000 Subject: [PATCH] [technitium] zone-sync now reconciles primaryNameServerAddresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a zone is created against a stale primary IP (e.g. the old primary pod IP 10.10.36.189 before the technitium-primary ClusterIP service existed), AXFR refresh keeps failing forever while every other zone on the same replica refreshes fine from 10.110.37.186. The resync-only branch didn't touch zone options, so the bad IP was pinned indefinitely. This surfaced as rpi-sofia.viktorbarzin.lan returning 192.168.1.16 (pre-move) on secondaries while primary had the correct .10 from 2026-04-22 morning — Uptime Kuma Sofia RPI monitor DOWN, cluster cluster_healthcheck FAIL. The sync loop now re-applies primaryNameServerAddresses on every run for existing zones. Idempotent — Technitium accepts identical values — and self-heals any drift within 30 min. Env renamed PRIMARY_IP → PRIMARY_HOST for consistency with the reconcile semantics. Hostname form (technitium-primary.technitium.svc.cluster.local) was tried but Technitium's own resolver doesn't forward svc.cluster.local, so the field must stay a literal IP. Terraform tracks the ClusterIP on every apply and the reconcile loop propagates it to replicas. --- stacks/technitium/modules/technitium/ha.tf | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/stacks/technitium/modules/technitium/ha.tf b/stacks/technitium/modules/technitium/ha.tf index bd90cbfa..71097afe 100644 --- a/stacks/technitium/modules/technitium/ha.tf +++ b/stacks/technitium/modules/technitium/ha.tf @@ -434,12 +434,17 @@ resource "kubernetes_cron_job_v1" "technitium_zone_sync" { while read -r zone; do if grep -qx "$zone" /tmp/replica_zones.txt; then - # Zone exists — just resync + # Zone exists — reconcile primaryNameServerAddresses to the + # stable FQDN before resync. Without this, a zone created + # against an old pod IP (pre-service-ClusterIP era) stays + # pinned to that dead IP forever and zone transfers fail + # silently. Idempotent — Technitium accepts identical values. + curl -sf "$REPLICA/api/zones/options/set?token=$R_TOKEN&zone=$zone&primaryNameServerAddresses=$PRIMARY_HOST" > /dev/null || true curl -sf "$REPLICA/api/zones/resync?token=$R_TOKEN&zone=$zone" > /dev/null || true else # New zone — create as Secondary and validate response echo "NEW: Creating $zone on $REPLICA" - RESP=$(curl -sf "$REPLICA/api/zones/create?token=$R_TOKEN&zone=$zone&type=Secondary&primaryNameServerAddresses=$PRIMARY_IP" || echo '{"status":"error"}') + RESP=$(curl -sf "$REPLICA/api/zones/create?token=$R_TOKEN&zone=$zone&type=Secondary&primaryNameServerAddresses=$PRIMARY_HOST" || echo '{"status":"error"}') if echo "$RESP" | grep -q '"status":"ok"'; then SYNCED=$((SYNCED + 1)) else @@ -486,7 +491,14 @@ resource "kubernetes_cron_job_v1" "technitium_zone_sync" { value = var.technitium_password } env { - name = "PRIMARY_IP" + # Service ClusterIP — Terraform tracks it on every apply, and the + # reconcile loop below re-applies it to every existing zone on + # every run (*/30m), so any drift (e.g. service recreate → new + # ClusterIP, or historical pod-IP values still pinned on replicas) + # self-heals within a sync cycle. Hostname form was tried but + # Technitium's own resolver doesn't forward svc.cluster.local, + # so `primaryNameServerAddresses` must be a literal IP. + name = "PRIMARY_HOST" value = kubernetes_service.technitium_primary.spec[0].cluster_ip } }