6d224861 came from a --no-checkout worktree whose empty index made the
commit drop every file except two. This restores 05b50d2b's full tree and
correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su
entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the
live infra was never applied from the broken commit.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
100 lines
4.1 KiB
HCL
100 lines
4.1 KiB
HCL
# =============================================================================
|
|
# Post-apply readiness gate
|
|
# =============================================================================
|
|
#
|
|
# Runs after all three Technitium deployments + the DNS LB service have been
|
|
# applied. Verifies that every instance is rolled out, the API responds, the
|
|
# DNS pods answer queries, and zone counts agree. Fails the apply if any
|
|
# check fails. No canary — this is a hard gate.
|
|
#
|
|
# Override for emergency maintenance: apply with `-var skip_readiness=true`
|
|
# (set via terragrunt inputs when needed), or `terraform apply -target` the
|
|
# resources needed without touching this module.
|
|
|
|
variable "skip_readiness" {
|
|
type = bool
|
|
default = false
|
|
description = "Skip the Technitium readiness gate. Use only for emergency maintenance."
|
|
}
|
|
|
|
resource "null_resource" "technitium_readiness_gate" {
|
|
count = var.skip_readiness ? 0 : 1
|
|
|
|
# Re-run when any deployment image/resource changes, or on every apply
|
|
# (timestamp) so transient drift still gets exercised.
|
|
triggers = {
|
|
primary_digest = sha256(jsonencode(kubernetes_deployment.technitium.spec[0].template[0].spec[0].container[0]))
|
|
secondary_digest = sha256(jsonencode(kubernetes_deployment.technitium_secondary.spec[0].template[0].spec[0].container[0]))
|
|
tertiary_digest = sha256(jsonencode(kubernetes_deployment.technitium_tertiary.spec[0].template[0].spec[0].container[0]))
|
|
corefile = sha256(kubernetes_config_map.coredns.data["Corefile"])
|
|
always = timestamp()
|
|
}
|
|
|
|
provisioner "local-exec" {
|
|
command = <<-BASH
|
|
set -euo pipefail
|
|
NS=technitium
|
|
echo "=== Technitium readiness gate ==="
|
|
|
|
# 1. Wait for rollout on all three deployments.
|
|
for d in technitium technitium-secondary technitium-tertiary; do
|
|
echo "-> rollout status deploy/$d"
|
|
kubectl -n $NS rollout status deploy/$d --timeout=180s
|
|
done
|
|
|
|
# 2. Per-pod DNS check + content parity. Technitium pods have `dig` but
|
|
# no HTTP client, so we use DNS directly. Each pod must return an A
|
|
# record for idrac.viktorbarzin.lan, AND the answer must match across
|
|
# all three instances. This catches:
|
|
# - Zone not loaded on an instance (NXDOMAIN / empty)
|
|
# - Zone drift between primary and replicas (different A record)
|
|
# The AXFR chain means all three should converge on the same value.
|
|
PODS=$(kubectl -n $NS get pod -l dns-server=true -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')
|
|
if [ -z "$PODS" ]; then
|
|
echo "ERROR: no dns-server=true pods found"
|
|
exit 1
|
|
fi
|
|
|
|
# Zone load can take tens of seconds after a memory-bump rollout, so retry
|
|
# up to 6 times with 10s backoff before giving up.
|
|
ANSWERS=""
|
|
for POD in $PODS; do
|
|
echo "-> dig @127.0.0.1 idrac.viktorbarzin.lan on $POD"
|
|
ANSWER=""
|
|
for TRY in 1 2 3 4 5 6; do
|
|
ANSWER=$(kubectl -n $NS exec "$POD" -- dig +short +time=5 +tries=2 @127.0.0.1 idrac.viktorbarzin.lan A 2>&1 || true)
|
|
if echo "$ANSWER" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'; then
|
|
break
|
|
fi
|
|
echo " attempt $TRY: no A record yet, sleeping 10s"
|
|
sleep 10
|
|
ANSWER=""
|
|
done
|
|
if [ -z "$ANSWER" ]; then
|
|
echo "ERROR: pod $POD never returned an A record for idrac.viktorbarzin.lan"
|
|
exit 1
|
|
fi
|
|
echo " $POD → $ANSWER"
|
|
ANSWERS="$ANSWERS $ANSWER"
|
|
done
|
|
|
|
# 3. Content parity — all three instances must agree on the A record.
|
|
UNIQ=$(echo "$ANSWERS" | tr ' ' '\n' | grep -v '^$' | sort -u | wc -l)
|
|
if [ "$UNIQ" -gt 1 ]; then
|
|
echo "ERROR: instances returned different A records for idrac.viktorbarzin.lan: $ANSWERS"
|
|
exit 1
|
|
fi
|
|
|
|
echo "=== Technitium readiness gate PASSED ==="
|
|
BASH
|
|
interpreter = ["/bin/bash", "-c"]
|
|
}
|
|
|
|
depends_on = [
|
|
kubernetes_deployment.technitium,
|
|
kubernetes_deployment.technitium_secondary,
|
|
kubernetes_deployment.technitium_tertiary,
|
|
kubernetes_service.technitium-dns,
|
|
kubernetes_pod_disruption_budget_v1.technitium_dns,
|
|
]
|
|
}
|