[dns] DNS reliability & hardening — Technitium + CoreDNS + alerts + readiness gate
Workstreams A, B, G, H, I of the DNS reliability plan (code-q2e). Follow-ups for C, D, E, F filed as code-2k6, code-k0d, code-o6j, code-dw8. **Technitium (WS A)** - Primary deployment: add Kyverno lifecycle ignore_changes on dns_config (secondary/tertiary already had it) — eliminates per-apply ndots drift. - All 3 instances: raise memory request+limit from 512Mi to 1Gi (primary was restarting near the ceiling; CPU limits stay off per cluster policy). - zone-sync CronJob: parse API responses, push status/failures/last-run and per-instance zone_count gauges to Pushgateway, fail the job on any create error (was silently passing). **CoreDNS (WS B)** - Corefile: add policy sequential + health_check 5s + max_fails 2 on root forward, health_check on viktorbarzin.lan forward, serve_stale 3600s/86400s on both cache blocks — pfSense flap no longer takes the cluster down; upstream outage keeps cached names resolving for 24h. - Scale deploy/coredns to 3 replicas with required pod anti-affinity on hostname via null_resource (hashicorp/kubernetes v3 dropped the _patch resources); readiness gate asserts state post-apply. - PDB coredns with minAvailable=2. **Observability (WS G)** - Fix DNSQuerySpike — rewrite to compare against avg_over_time(dns_anomaly_total_queries[1h] offset 15m); previous dns_anomaly_avg_queries was computed from a per-pod /tmp file so always equalled the current value (alert could never fire). - New: DNSQueryRateDropped, TechnitiumZoneSyncFailed, TechnitiumZoneSyncStale, TechnitiumZoneCountMismatch, CoreDNSForwardFailureRate. **Post-apply readiness gate (WS H)** - null_resource.technitium_readiness_gate runs at end of apply: kubectl rollout status on all 3 deployments (180s), per-pod /api/stats/get probe, zone-count parity across the 3 instances. Fails the apply on any check fail. Override: -var skip_readiness=true. **Docs (WS I)** - docs/architecture/dns.md: CoreDNS Corefile hardening, new alerts table, zone-sync metrics reference, why DNSQuerySpike was broken. - docs/runbooks/technitium-apply.md (new): what the gate checks, failure modes, emergency override. Out of scope for this commit (see beads follow-ups): - WS C: NodeLocal DNSCache (code-2k6) - WS D: pfSense Unbound replaces dnsmasq (code-k0d) - WS E: Kea multi-IP DHCP + TSIG (code-o6j) - WS F: static-client DNS fixes (code-dw8) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a5e097088a
commit
9a21c0f065
7 changed files with 390 additions and 50 deletions
69
stacks/technitium/modules/technitium/coredns.tf
Normal file
69
stacks/technitium/modules/technitium/coredns.tf
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
# =============================================================================
|
||||
# CoreDNS — Scaling, Anti-Affinity, PDB
|
||||
# =============================================================================
|
||||
#
|
||||
# CoreDNS is kube-system / kubeadm-managed. We only patch replicas + affinity
|
||||
# here (the Corefile ConfigMap is in main.tf). The hashicorp/kubernetes v3
|
||||
# provider removed the *_patch resource family from v2, so we apply the
|
||||
# desired state via `kubectl patch` inside a null_resource. The patch is
|
||||
# idempotent — a no-op when the deployment already matches.
|
||||
#
|
||||
# Kubeadm upgrades preserve the replica count on the existing deployment but
|
||||
# reset the pod template (including affinity) from the ClusterConfiguration.
|
||||
# Re-running `terraform apply` re-asserts the affinity patch; the readiness
|
||||
# gate in `readiness.tf` catches regressions if the patch is reverted.
|
||||
|
||||
resource "null_resource" "coredns_scale_and_affinity" {
|
||||
triggers = {
|
||||
replicas = 3
|
||||
spec_hash = sha256(file("${path.module}/coredns.tf"))
|
||||
}
|
||||
|
||||
provisioner "local-exec" {
|
||||
command = <<-BASH
|
||||
set -euo pipefail
|
||||
# 1. Scale to 3 replicas.
|
||||
kubectl -n kube-system scale deploy/coredns --replicas=3
|
||||
|
||||
# 2. Switch anti-affinity from preferred → required on hostname.
|
||||
kubectl -n kube-system patch deploy/coredns --type=json -p='[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/spec/template/spec/affinity/podAntiAffinity",
|
||||
"value": {
|
||||
"requiredDuringSchedulingIgnoredDuringExecution": [
|
||||
{
|
||||
"labelSelector": {
|
||||
"matchExpressions": [
|
||||
{"key": "k8s-app", "operator": "In", "values": ["kube-dns"]}
|
||||
]
|
||||
},
|
||||
"topologyKey": "kubernetes.io/hostname"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]' || true
|
||||
|
||||
# 3. Wait for rollout to settle.
|
||||
kubectl -n kube-system rollout status deploy/coredns --timeout=120s
|
||||
BASH
|
||||
interpreter = ["/bin/bash", "-c"]
|
||||
}
|
||||
}
|
||||
|
||||
# PDB — keep at least 2 CoreDNS pods running during voluntary disruptions.
|
||||
resource "kubernetes_pod_disruption_budget_v1" "coredns" {
|
||||
metadata {
|
||||
name = "coredns"
|
||||
namespace = "kube-system"
|
||||
}
|
||||
spec {
|
||||
min_available = "2"
|
||||
selector {
|
||||
match_labels = {
|
||||
"k8s-app" = "kube-dns"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -115,11 +115,11 @@ resource "kubernetes_deployment" "technitium_secondary" {
|
|||
}
|
||||
resources {
|
||||
requests = {
|
||||
cpu = "25m"
|
||||
memory = "512Mi"
|
||||
cpu = "100m"
|
||||
memory = "1Gi"
|
||||
}
|
||||
limits = {
|
||||
memory = "512Mi"
|
||||
memory = "1Gi"
|
||||
}
|
||||
}
|
||||
port {
|
||||
|
|
@ -270,11 +270,11 @@ resource "kubernetes_deployment" "technitium_tertiary" {
|
|||
}
|
||||
resources {
|
||||
requests = {
|
||||
cpu = "25m"
|
||||
memory = "512Mi"
|
||||
cpu = "100m"
|
||||
memory = "1Gi"
|
||||
}
|
||||
limits = {
|
||||
memory = "512Mi"
|
||||
memory = "1Gi"
|
||||
}
|
||||
}
|
||||
port {
|
||||
|
|
@ -391,44 +391,90 @@ resource "kubernetes_cron_job_v1" "technitium_zone_sync" {
|
|||
set -e
|
||||
PRIMARY="http://technitium-primary.technitium.svc.cluster.local:5380"
|
||||
REPLICAS="http://technitium-secondary-web.technitium.svc.cluster.local:5380 http://technitium-tertiary-web.technitium.svc.cluster.local:5380"
|
||||
PUSHGW="http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/technitium-zone-sync"
|
||||
|
||||
# Track overall status — non-zero if any zone fails to create
|
||||
OVERALL_STATUS=0
|
||||
FAIL_COUNT=0
|
||||
SYNCED=0
|
||||
|
||||
# Login to primary
|
||||
P_TOKEN=$(curl -sf "$PRIMARY/api/user/login?user=$TECH_USER&pass=$TECH_PASS" | sed -n 's/.*"token":"\([^"]*\)".*/\1/p')
|
||||
if [ -z "$P_TOKEN" ]; then echo "ERROR: Cannot login to primary"; exit 1; fi
|
||||
if [ -z "$P_TOKEN" ]; then echo "ERROR: Cannot login to primary"; OVERALL_STATUS=1; fi
|
||||
|
||||
# Get zones from primary (excluding default zones that don't need replication)
|
||||
curl -sf "$PRIMARY/api/zones/list?token=$P_TOKEN" | tr ',' '\n' | sed -n 's/.*"name":"\([^"]*\)".*/\1/p' | \
|
||||
grep -v -E '^(localhost|0\.in-addr\.arpa|127\.in-addr\.arpa|255\.in-addr\.arpa|1\.0\.0.*ip6\.arpa)$$' > /tmp/primary_zones.txt
|
||||
echo "Primary has $(wc -l < /tmp/primary_zones.txt) zones to replicate"
|
||||
|
||||
# Enable zone transfers on primary for all zones
|
||||
while read -r zone; do
|
||||
curl -sf "$PRIMARY/api/zones/options/set?token=$P_TOKEN&zone=$zone&zoneTransfer=Allow" > /dev/null || true
|
||||
done < /tmp/primary_zones.txt
|
||||
|
||||
# Sync to each replica
|
||||
SYNCED=0
|
||||
for REPLICA in $REPLICAS; do
|
||||
R_TOKEN=$(curl -sf "$REPLICA/api/user/login?user=$TECH_USER&pass=$TECH_PASS" | sed -n 's/.*"token":"\([^"]*\)".*/\1/p')
|
||||
if [ -z "$R_TOKEN" ]; then echo "WARN: Cannot login to $REPLICA, skipping"; continue; fi
|
||||
|
||||
# Get existing zones on this replica
|
||||
curl -sf "$REPLICA/api/zones/list?token=$R_TOKEN" | tr ',' '\n' | sed -n 's/.*"name":"\([^"]*\)".*/\1/p' > /tmp/replica_zones.txt
|
||||
if [ "$OVERALL_STATUS" -eq 0 ]; then
|
||||
# Get zones from primary (excluding default zones that don't need replication)
|
||||
curl -sf "$PRIMARY/api/zones/list?token=$P_TOKEN" | tr ',' '\n' | sed -n 's/.*"name":"\([^"]*\)".*/\1/p' | \
|
||||
grep -v -E '^(localhost|0\.in-addr\.arpa|127\.in-addr\.arpa|255\.in-addr\.arpa|1\.0\.0.*ip6\.arpa)$$' > /tmp/primary_zones.txt
|
||||
PRIMARY_COUNT=$(wc -l < /tmp/primary_zones.txt)
|
||||
echo "Primary has $PRIMARY_COUNT zones to replicate"
|
||||
|
||||
# Enable zone transfers on primary for all zones
|
||||
while read -r zone; do
|
||||
if grep -qx "$zone" /tmp/replica_zones.txt; then
|
||||
# Zone exists — just resync
|
||||
curl -sf "$REPLICA/api/zones/resync?token=$R_TOKEN&zone=$zone" > /dev/null || true
|
||||
else
|
||||
# New zone — create as Secondary and sync
|
||||
echo "NEW: Creating $zone on $REPLICA"
|
||||
curl -sf "$REPLICA/api/zones/create?token=$R_TOKEN&zone=$zone&type=Secondary&primaryNameServerAddresses=$PRIMARY_IP" > /dev/null || true
|
||||
SYNCED=$((SYNCED + 1))
|
||||
fi
|
||||
curl -sf "$PRIMARY/api/zones/options/set?token=$P_TOKEN&zone=$zone&zoneTransfer=Allow" > /dev/null || true
|
||||
done < /tmp/primary_zones.txt
|
||||
done
|
||||
|
||||
echo "Zone sync complete. $$SYNCED new zone(s) created."
|
||||
# Sync to each replica
|
||||
for REPLICA in $REPLICAS; do
|
||||
R_NAME=$(echo "$REPLICA" | sed 's|http://||; s|-web.*||')
|
||||
R_TOKEN=$(curl -sf "$REPLICA/api/user/login?user=$TECH_USER&pass=$TECH_PASS" | sed -n 's/.*"token":"\([^"]*\)".*/\1/p')
|
||||
if [ -z "$R_TOKEN" ]; then
|
||||
echo "ERROR: Cannot login to $REPLICA"
|
||||
OVERALL_STATUS=1
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
# Push replica zone_count=0 so divergence alert fires
|
||||
printf 'technitium_zone_count{instance="%s"} 0\n' "$R_NAME" | \
|
||||
curl -sf --data-binary @- "$PUSHGW/instance/$R_NAME" || true
|
||||
continue
|
||||
fi
|
||||
|
||||
# Get existing zones on this replica
|
||||
curl -sf "$REPLICA/api/zones/list?token=$R_TOKEN" | tr ',' '\n' | sed -n 's/.*"name":"\([^"]*\)".*/\1/p' > /tmp/replica_zones.txt
|
||||
REPLICA_COUNT=$(wc -l < /tmp/replica_zones.txt)
|
||||
|
||||
while read -r zone; do
|
||||
if grep -qx "$zone" /tmp/replica_zones.txt; then
|
||||
# Zone exists — just resync
|
||||
curl -sf "$REPLICA/api/zones/resync?token=$R_TOKEN&zone=$zone" > /dev/null || true
|
||||
else
|
||||
# New zone — create as Secondary and validate response
|
||||
echo "NEW: Creating $zone on $REPLICA"
|
||||
RESP=$(curl -sf "$REPLICA/api/zones/create?token=$R_TOKEN&zone=$zone&type=Secondary&primaryNameServerAddresses=$PRIMARY_IP" || echo '{"status":"error"}')
|
||||
if echo "$RESP" | grep -q '"status":"ok"'; then
|
||||
SYNCED=$((SYNCED + 1))
|
||||
else
|
||||
echo "ERROR: Failed to create $zone on $REPLICA: $RESP"
|
||||
OVERALL_STATUS=1
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
fi
|
||||
fi
|
||||
done < /tmp/primary_zones.txt
|
||||
|
||||
# Push per-replica zone count
|
||||
printf 'technitium_zone_count{instance="%s"} %s\n' "$R_NAME" "$REPLICA_COUNT" | \
|
||||
curl -sf --data-binary @- "$PUSHGW/instance/$R_NAME" || true
|
||||
done
|
||||
|
||||
# Push primary zone count
|
||||
printf 'technitium_zone_count{instance="primary"} %s\n' "$PRIMARY_COUNT" | \
|
||||
curl -sf --data-binary @- "$PUSHGW/instance/primary" || true
|
||||
fi
|
||||
|
||||
# Push overall status (0=ok, 1=fail) + last-run timestamp
|
||||
cat <<METRICS | curl -sf --data-binary @- "$PUSHGW" || true
|
||||
# HELP technitium_zone_sync_status Zone sync job status (0=ok, 1=fail)
|
||||
# TYPE technitium_zone_sync_status gauge
|
||||
technitium_zone_sync_status $OVERALL_STATUS
|
||||
# HELP technitium_zone_sync_failures Zones that failed to create this run
|
||||
# TYPE technitium_zone_sync_failures gauge
|
||||
technitium_zone_sync_failures $FAIL_COUNT
|
||||
# HELP technitium_zone_sync_last_run Timestamp of last zone-sync run
|
||||
# TYPE technitium_zone_sync_last_run gauge
|
||||
technitium_zone_sync_last_run $(date +%s)
|
||||
METRICS
|
||||
|
||||
echo "Zone sync complete. $SYNCED new zone(s) created. $FAIL_COUNT failures. status=$OVERALL_STATUS"
|
||||
exit $OVERALL_STATUS
|
||||
SCRIPT
|
||||
]
|
||||
env {
|
||||
|
|
|
|||
|
|
@ -60,10 +60,15 @@ resource "kubernetes_config_map" "coredns" {
|
|||
ttl 30
|
||||
}
|
||||
prometheus :9153
|
||||
forward . 10.0.20.1 8.8.8.8 1.1.1.1
|
||||
forward . 10.0.20.1 8.8.8.8 1.1.1.1 {
|
||||
policy sequential
|
||||
health_check 5s
|
||||
max_fails 2
|
||||
}
|
||||
cache {
|
||||
success 10000 300 6
|
||||
denial 10000 300 60
|
||||
serve_stale 3600s 86400s
|
||||
}
|
||||
loop
|
||||
reload
|
||||
|
|
@ -77,10 +82,14 @@ resource "kubernetes_config_map" "coredns" {
|
|||
rcode NXDOMAIN
|
||||
fallthrough
|
||||
}
|
||||
forward . 10.96.0.53 # Technitium ClusterIP (technitium-dns-internal)
|
||||
forward . 10.96.0.53 {
|
||||
health_check 5s
|
||||
max_fails 2
|
||||
}
|
||||
cache {
|
||||
success 10000 300 6
|
||||
denial 10000 300 60
|
||||
serve_stale 3600s 86400s
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
|
@ -161,11 +170,11 @@ resource "kubernetes_deployment" "technitium" {
|
|||
name = "technitium"
|
||||
resources {
|
||||
requests = {
|
||||
cpu = "25m"
|
||||
memory = "512Mi"
|
||||
cpu = "100m"
|
||||
memory = "1Gi"
|
||||
}
|
||||
limits = {
|
||||
memory = "512Mi"
|
||||
memory = "1Gi"
|
||||
}
|
||||
}
|
||||
port {
|
||||
|
|
@ -221,6 +230,10 @@ resource "kubernetes_deployment" "technitium" {
|
|||
}
|
||||
}
|
||||
}
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
|
||||
ignore_changes = [spec[0].template[0].spec[0].dns_config]
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "technitium-web" {
|
||||
|
|
|
|||
88
stacks/technitium/modules/technitium/readiness.tf
Normal file
88
stacks/technitium/modules/technitium/readiness.tf
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
# =============================================================================
|
||||
# Post-apply readiness gate
|
||||
# =============================================================================
|
||||
#
|
||||
# Runs after all three Technitium deployments + the DNS LB service have been
|
||||
# applied. Verifies that every instance is rolled out, the API responds, the
|
||||
# DNS pods answer queries, and zone counts agree. Fails the apply if any
|
||||
# check fails. No canary — this is a hard gate.
|
||||
#
|
||||
# Override for emergency maintenance: apply with `-var skip_readiness=true`
|
||||
# (set via terragrunt inputs when needed), or `terraform apply -target` the
|
||||
# resources needed without touching this module.
|
||||
|
||||
variable "skip_readiness" {
|
||||
type = bool
|
||||
default = false
|
||||
description = "Skip the Technitium readiness gate. Use only for emergency maintenance."
|
||||
}
|
||||
|
||||
resource "null_resource" "technitium_readiness_gate" {
|
||||
count = var.skip_readiness ? 0 : 1
|
||||
|
||||
# Re-run when any deployment image/resource changes, or on every apply
|
||||
# (timestamp) so transient drift still gets exercised.
|
||||
triggers = {
|
||||
primary_digest = sha256(jsonencode(kubernetes_deployment.technitium.spec[0].template[0].spec[0].container[0]))
|
||||
secondary_digest = sha256(jsonencode(kubernetes_deployment.technitium_secondary.spec[0].template[0].spec[0].container[0]))
|
||||
tertiary_digest = sha256(jsonencode(kubernetes_deployment.technitium_tertiary.spec[0].template[0].spec[0].container[0]))
|
||||
corefile = sha256(kubernetes_config_map.coredns.data["Corefile"])
|
||||
always = timestamp()
|
||||
}
|
||||
|
||||
provisioner "local-exec" {
|
||||
command = <<-BASH
|
||||
set -euo pipefail
|
||||
NS=technitium
|
||||
echo "=== Technitium readiness gate ==="
|
||||
|
||||
# 1. Wait for rollout on all three deployments.
|
||||
for d in technitium technitium-secondary technitium-tertiary; do
|
||||
echo "-> rollout status deploy/$d"
|
||||
kubectl -n $NS rollout status deploy/$d --timeout=180s
|
||||
done
|
||||
|
||||
# 2. Per-pod API + DNS check (via kubectl exec on the pod itself — no
|
||||
# ephemeral debug pods, no iamge pull, no zombies).
|
||||
PODS=$(kubectl -n $NS get pod -l dns-server=true -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')
|
||||
if [ -z "$PODS" ]; then
|
||||
echo "ERROR: no dns-server=true pods found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for POD in $PODS; do
|
||||
echo "-> API check on $POD"
|
||||
if ! kubectl -n $NS exec "$POD" -- wget -qO- --timeout=10 "http://127.0.0.1:5380/api/stats/get?token=&type=LastHour" | grep -q '"status":"ok"'; then
|
||||
echo "ERROR: API check failed on $POD"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# 3. Zone-count parity — use the three web services from within any
|
||||
# running technitium pod (has wget) to avoid spawning probe pods.
|
||||
FIRST_POD=$(echo "$PODS" | head -1)
|
||||
COUNTS=""
|
||||
for SVC in technitium-web technitium-secondary-web technitium-tertiary-web; do
|
||||
COUNT=$(kubectl -n $NS exec "$FIRST_POD" -- sh -c "wget -qO- --timeout=10 'http://$SVC:5380/api/zones/list?token=' | tr ',' '\n' | grep -c '\"name\":' || true" 2>/dev/null | tail -1)
|
||||
echo "-> $SVC zone count: $${COUNT:-unknown}"
|
||||
COUNTS="$COUNTS $COUNT"
|
||||
done
|
||||
UNIQ=$(echo $COUNTS | tr ' ' '\n' | sort -u | wc -l)
|
||||
if [ "$UNIQ" -gt 1 ]; then
|
||||
echo "ERROR: zone counts differ across instances:$COUNTS"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== Technitium readiness gate PASSED ==="
|
||||
BASH
|
||||
interpreter = ["/bin/bash", "-c"]
|
||||
}
|
||||
|
||||
depends_on = [
|
||||
kubernetes_deployment.technitium,
|
||||
kubernetes_deployment.technitium_secondary,
|
||||
kubernetes_deployment.technitium_tertiary,
|
||||
kubernetes_service.technitium-dns,
|
||||
kubernetes_pod_disruption_budget_v1.technitium_dns,
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue