technitium: mirror mail-auth records into internal zone; fix redfish check [ci skip]
Two fixes from the post-DNS-internalization health sweep:
1. The internal viktorbarzin.me zone served only ingress A/CNAME records.
Since the mailserver pods now resolve the domain through it (CoreDNS
viktorbarzin.me:53 -> Technitium, 59a531b8), rspamd's SPF checks on
inbound @viktorbarzin.me mail saw SPF=none and quarantined it — the
Brevo email-roundtrip probe failed from the 16:20 run onward
(EmailRoundtripFailing/Stale). The ingress-dns-sync CronJob now also
maintains the static mail-auth records (SPF, brevo-code TXT, MX;
DMARC + DKIM were already present), idempotently. Principle: the
internal zone must be a SUPERSET of the public zone for every record
type internal clients consume. Verified in-pod: all four types
resolve; roundtrip re-probe green.
2. cluster_healthcheck #30 queried instant `up`, which goes stale for
~5 of every 10 minutes on the deliberate 10m redfish-idrac remnant
job -> intermittent false "redfish-idrac=missing". Now uses
last_over_time(up[15m]) — same answers for fast jobs.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
e7fbf986fb
commit
00bc1e052d
2 changed files with 37 additions and 2 deletions
|
|
@ -2026,11 +2026,16 @@ check_hardware_exporters() {
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Check Prometheus scrape targets for hardware exporters
|
# Check Prometheus scrape targets for hardware exporters.
|
||||||
|
# last_over_time(up[15m]) instead of instant `up`: the redfish-idrac
|
||||||
|
# remnant scrapes every 10m (> the 5m staleness window), so an instant
|
||||||
|
# query returns it EMPTY ~half the time -> intermittent false "missing"
|
||||||
|
# (observed 2026-06-10). 15m covers the slowest job; identical answers
|
||||||
|
# for the 1-2m jobs.
|
||||||
local prom_jobs=("snmp-idrac" "snmp-ups" "redfish-idrac" "proxmox-host")
|
local prom_jobs=("snmp-idrac" "snmp-ups" "redfish-idrac" "proxmox-host")
|
||||||
local up_result
|
local up_result
|
||||||
up_result=$($KUBECTL exec -n monitoring deploy/prometheus-server -- \
|
up_result=$($KUBECTL exec -n monitoring deploy/prometheus-server -- \
|
||||||
wget -q -O- 'http://localhost:9090/api/v1/query?query=up' 2>/dev/null || true)
|
wget -q -O- 'http://localhost:9090/api/v1/query?query=last_over_time(up%5B15m%5D)' 2>/dev/null || true)
|
||||||
|
|
||||||
if [[ -n "$up_result" ]]; then
|
if [[ -n "$up_result" ]]; then
|
||||||
for job in "${prom_jobs[@]}"; do
|
for job in "${prom_jobs[@]}"; do
|
||||||
|
|
|
||||||
|
|
@ -957,6 +957,36 @@ resource "kubernetes_cron_job_v1" "technitium_ingress_dns_sync" {
|
||||||
done
|
done
|
||||||
echo "Sync complete. Created $$CREATED new records."
|
echo "Sync complete. Created $$CREATED new records."
|
||||||
|
|
||||||
|
# Static mail-auth records (SPF / brevo verification / MX /
|
||||||
|
# DMARC / DKIM) mirrored from the PUBLIC Cloudflare zone.
|
||||||
|
# The internal zone is authoritative for viktorbarzin.me, and
|
||||||
|
# since 2026-06-10 the MAILSERVER pods resolve the domain
|
||||||
|
# through it (CoreDNS viktorbarzin.me:53 -> Technitium).
|
||||||
|
# Without these, rspamd's SPF/DKIM/DMARC checks on inbound
|
||||||
|
# @viktorbarzin.me mail (e.g. the Brevo email-roundtrip probe)
|
||||||
|
# see SPF=none/DKIM=fail and quarantine it (EmailRoundtrip*
|
||||||
|
# alerts, 2026-06-10). Internal zone must be a SUPERSET of the
|
||||||
|
# public one for every record type clients consume. Idempotent:
|
||||||
|
# checked against the zone dump before adding. If these change
|
||||||
|
# in Cloudflare, update here too (slow-moving).
|
||||||
|
ZONE_DUMP=$$(curl -sf "$$TECH_API/api/zones/records/get?token=$$TOKEN&zone=$$ZONE&domain=$$ZONE&listZone=true")
|
||||||
|
add_txt() {
|
||||||
|
NAME="$$1"; MARK="$$2"; VALUE="$$3"
|
||||||
|
if echo "$$ZONE_DUMP" | grep -q "$$MARK"; then echo "mail-auth: $$NAME ($$MARK) present"; return; fi
|
||||||
|
R=$$(curl -sf -G "$$TECH_API/api/zones/records/add" --data-urlencode "token=$$TOKEN" --data-urlencode "zone=$$ZONE" --data-urlencode "domain=$$NAME" --data-urlencode "type=TXT" --data-urlencode "text=$$VALUE" --data-urlencode "ttl=3600") || true
|
||||||
|
echo "$$R" | grep -q '"status":"ok"' && echo "mail-auth: added TXT $$NAME ($$MARK)" || echo "mail-auth: FAILED TXT $$NAME -- $$R"
|
||||||
|
}
|
||||||
|
add_txt "$$ZONE" "v=spf1" "v=spf1 include:spf.brevo.com ~all"
|
||||||
|
add_txt "$$ZONE" "brevo-code" "brevo-code:a6ef1dd91b248559900246eb4e7ceebd"
|
||||||
|
add_txt "_dmarc.$$ZONE" "v=DMARC1" "v=DMARC1; p=quarantine; pct=100; fo=1; ri=3600; sp=quarantine; adkim=r; aspf=r; rua=mailto:dmarc@viktorbarzin.me,mailto:adb84997@inbox.ondmarc.com; ruf=mailto:dmarc@viktorbarzin.me,mailto:adb84997@inbox.ondmarc.com,mailto:postmaster@viktorbarzin.me;"
|
||||||
|
add_txt "mail._domainkey.$$ZONE" "v=DKIM1" "v=DKIM1; h=sha256; k=rsa; p=MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAs9XHeFBKhUAEJSikXx+P49Q3nEBbnaSpn6h/9TqIhKaZWSVa2uGUGYQieNdon7DEJZ0VFo0Tvm3/UFsy2qF7ZmF+E/+N8EmkcPrMlxgJT281dpk5DxrZ+kbzw/DosfHH71K6vCLB4rSexzxJHaAx0AUddI3bFUJGjMgCXXCMZF+p8YCx+DDGPIXz2FOTtlJlR7aeZ2xXavwE/lBfI3MLnsq7X+GhPjQEax070nndOdZI0S8HpZkVxdGWl1N2Ec6LukYm2RiUkEMMQHSYX7WF3JBc+CGqUyd706Iy/5oeC3UGwZSM2uLkrp8YBjmw/h1rAeyv/ITt6ZXraP/cIMRiVQIDAQAB"
|
||||||
|
if ! echo "$$ZONE_DUMP" | grep -q '"type":"MX"'; then
|
||||||
|
R=$$(curl -sf -G "$$TECH_API/api/zones/records/add" --data-urlencode "token=$$TOKEN" --data-urlencode "zone=$$ZONE" --data-urlencode "domain=$$ZONE" --data-urlencode "type=MX" --data-urlencode "exchange=mail.viktorbarzin.me" --data-urlencode "preference=1" --data-urlencode "ttl=3600") || true
|
||||||
|
echo "$$R" | grep -q '"status":"ok"' && echo "mail-auth: added MX" || echo "mail-auth: FAILED MX -- $$R"
|
||||||
|
else
|
||||||
|
echo "mail-auth: MX present"
|
||||||
|
fi
|
||||||
|
|
||||||
# Pin the .lan ingress anchor A record to the LIVE Traefik LB IP.
|
# Pin the .lan ingress anchor A record to the LIVE Traefik LB IP.
|
||||||
# *.viktorbarzin.lan ingress hosts CNAME to ingress.viktorbarzin.lan,
|
# *.viktorbarzin.lan ingress hosts CNAME to ingress.viktorbarzin.lan,
|
||||||
# so a Traefik LB IP move that misses the .lan zone silently breaks
|
# so a Traefik LB IP move that misses the .lan zone silently breaks
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue