From ef75c02f0d5245537dd60d829c85074f6c10054f Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 19 Apr 2026 11:52:49 +0000 Subject: [PATCH] =?UTF-8?q?[mailserver]=20Phase=201a=20=E2=80=94=20alt=20:?= =?UTF-8?q?2525=20postscreen=20listener=20+=20NodePort=20[ci=20skip]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context (bd code-yiu) Toward replacing MetalLB ETP:Local + pod-speaker colocation with pfSense HAProxy injecting PROXY v2 → mailserver. This commit lays the k8s-side groundwork for port 25 only. External SMTP flow post-cutover: Client → pfSense WAN:25 → pfSense HAProxy (injects PROXY v2) → k8s-node:30125 (NodePort for mailserver-proxy Service, ETP:Cluster) → kube-proxy → pod :2525 (postscreen with postscreen_upstream_proxy_protocol=haproxy) → real client IP recovered from PROXY header despite kube-proxy SNAT. Internal clients (Roundcube, email-roundtrip-monitor) keep using the stock :25 on mailserver.svc ClusterIP — no PROXY required, zero regression. ## This change - New `kubernetes_config_map.mailserver_user_patches` with a `user-patches.sh` script. docker-mailserver runs `/tmp/docker-mailserver/user-patches.sh` on startup; our script appends a `2525 postscreen` entry to `master.cf` with `-o postscreen_upstream_proxy_protocol=haproxy` and a 5s PROXY timeout. Sentinel-guarded for idempotency on in-place restart. - New volume + volume_mount (`mode = 0755` via defaultMode) wires the ConfigMap into the mailserver container. - New container port spec for 2525 (informational; kube-proxy resolves targetPort by number anyway). - New Service `mailserver-proxy` — NodePort type, ETP:Cluster, selector `app=mailserver`, port 25 → targetPort 2525 → fixed nodePort 30125. pfSense HAProxy's backend pool will be `:30125 check send-proxy-v2`. The existing `mailserver` LoadBalancer Service (ETP:Local, 10.0.20.202, ports 25/465/587/993) is untouched. Traffic still flows through it via the pfSense NAT `` alias; this commit does not change routing. ## What is NOT in this change - pfSense HAProxy install/config (Phase 2 — out-of-Terraform, runbook-managed) - pfSense NAT rdr flip from `` → HAProxy VIP (Phase 4) - 465/587/993 — scoped to port 25 first for proof of concept. Other ports get the same treatment (alt listeners 4465/5587/10993 + Service ports) once 25 is proven. - Dovecot per-listener `haproxy = yes` — irrelevant until IMAP is migrated. ## Test Plan ### Automated (verified pre-commit) ``` $ kubectl rollout status deployment/mailserver -n mailserver deployment "mailserver" successfully rolled out $ kubectl exec -n mailserver -c docker-mailserver deployment/mailserver -- \ postconf -M | grep '^2525' 2525 inet n - y - 1 postscreen \ -o syslog_name=postfix/smtpd-proxy \ -o postscreen_upstream_proxy_protocol=haproxy \ -o postscreen_upstream_proxy_timeout=5s $ kubectl exec -n mailserver -c docker-mailserver deployment/mailserver -- \ ss -ltn | grep -E ':25\b|:2525' LISTEN 0 100 0.0.0.0:2525 0.0.0.0:* LISTEN 0 100 0.0.0.0:25 0.0.0.0:* $ kubectl get svc -n mailserver mailserver-proxy NAME TYPE CLUSTER-IP PORT(S) AGE mailserver-proxy NodePort 10.98.213.164 25:30125/TCP 93s # Expected-to-fail probe (no PROXY header) → postscreen rejects $ timeout 8 nc -v 10.0.20.101 30125 :2525 --body "phase 1 test"` 2. In mailserver logs: `kubectl logs -c docker-mailserver deployment/mailserver | grep postfix/smtpd-proxy` — "connect from []" with the real public IP, NOT the k8s node IP. 3. E2E probe CronJob keeps green (uses ClusterIP path, unaffected). ## Reproduce locally 1. `kubectl get svc mailserver-proxy -n mailserver` → NodePort 30125 exists 2. `kubectl get cm mailserver-user-patches -n mailserver` → exists 3. `timeout 8 nc -v :30125 > "$MASTER_CF" <<'PFXEOF' + + # code-yiu:2525 — PROXY-speaking postscreen listener for pfSense HAProxy backend. + 2525 inet n - y - 1 postscreen + -o syslog_name=postfix/smtpd-proxy + -o postscreen_upstream_proxy_protocol=haproxy + -o postscreen_upstream_proxy_timeout=5s + PFXEOF + fi + EOT + } +} resource "kubernetes_secret" "opendkim_key" { metadata { @@ -401,6 +426,15 @@ resource "kubernetes_deployment" "mailserver" { sub_path = "fail2ban_conf" read_only = true } + # code-yiu Phase 1a: user-patches.sh runs at container startup to + # append PROXY-speaking listeners to master.cf (see + # kubernetes_config_map.mailserver_user_patches). + volume_mount { + name = "user-patches" + mount_path = "/tmp/docker-mailserver/user-patches.sh" + sub_path = "user-patches.sh" + read_only = true + } port { name = "smtp" container_port = 25 @@ -421,6 +455,12 @@ resource "kubernetes_deployment" "mailserver" { container_port = 993 protocol = "TCP" } + # code-yiu Phase 1a: alt PROXY-speaking SMTP listener. + port { + name = "smtp-proxy" + container_port = 2525 + protocol = "TCP" + } env_from { config_map_ref { name = "mailserver.env.config" @@ -487,12 +527,14 @@ resource "kubernetes_deployment" "mailserver" { # fs_type = "ext4" # } } - # volume { - # name = "user-patches" - # config_map { - # name = "user-patches" - # } - # } + # code-yiu Phase 1a + volume { + name = "user-patches" + config_map { + name = kubernetes_config_map.mailserver_user_patches.metadata[0].name + default_mode = "0755" + } + } volume { name = "var-run-dovecot" empty_dir {} @@ -567,6 +609,37 @@ resource "kubernetes_service" "mailserver" { # re-introduced, add back: ClusterIP Service exposing port 9166 # with selector app=mailserver. +# code-yiu Phase 1a: NodePort Service for pfSense HAProxy backend connections. +# External SMTP flow post-cutover: +# Client → pfSense WAN:25 → pfSense HAProxy → k8s-node:30125 (NodePort +# targeting container :2525 on any node, ETP: Cluster) → pod postscreen +# with PROXY v2 parsing → real client IP in maillog. +# Internal flow (Roundcube, probe) stays on the mailserver ClusterIP Service +# hitting container :25 without PROXY — unchanged. +resource "kubernetes_service" "mailserver_proxy" { + metadata { + name = "mailserver-proxy" + namespace = kubernetes_namespace.mailserver.metadata[0].name + labels = { + app = "mailserver" + } + } + spec { + type = "NodePort" + external_traffic_policy = "Cluster" + selector = { + app = "mailserver" + } + port { + name = "smtp-proxy" + protocol = "TCP" + port = 25 + target_port = 2525 + node_port = 30125 + } + } +} + # ============================================================================= # E2E Email Roundtrip Monitor # Sends test email via Brevo API, verifies delivery via IMAP, pushes metrics @@ -728,17 +801,24 @@ except Exception as e: duration = time.time() - start print(f"ERROR: {e}") -# Push metrics to Pushgateway -metrics = f"""# HELP email_roundtrip_success Whether the last e2e email probe succeeded -# TYPE email_roundtrip_success gauge -email_roundtrip_success {success} -# HELP email_roundtrip_duration_seconds Duration of the last e2e email probe -# TYPE email_roundtrip_duration_seconds gauge -email_roundtrip_duration_seconds {duration:.2f} -# HELP email_roundtrip_last_success_timestamp Unix timestamp of last successful probe -# TYPE email_roundtrip_last_success_timestamp gauge -email_roundtrip_last_success_timestamp {int(time.time()) if success else 0} -""" +# Push metrics to Pushgateway. On failure we omit email_roundtrip_last_success_timestamp +# and POST (not PUT) so the prior successful timestamp is preserved — otherwise pushing 0 +# makes EmailRoundtripStale fire immediately alongside EmailRoundtripFailing. +metric_lines = [ + "# HELP email_roundtrip_success Whether the last e2e email probe succeeded", + "# TYPE email_roundtrip_success gauge", + f"email_roundtrip_success {success}", + "# HELP email_roundtrip_duration_seconds Duration of the last e2e email probe", + "# TYPE email_roundtrip_duration_seconds gauge", + f"email_roundtrip_duration_seconds {duration:.2f}", +] +if success: + metric_lines += [ + "# HELP email_roundtrip_last_success_timestamp Unix timestamp of last successful probe", + "# TYPE email_roundtrip_last_success_timestamp gauge", + f"email_roundtrip_last_success_timestamp {int(time.time())}", + ] +metrics = "\n".join(metric_lines) + "\n" UPTIME_KUMA_URL = "http://uptime-kuma.uptime-kuma.svc.cluster.local/api/push/hLtyRKgeZO?status=up&msg=OK&ping=" + str(int(duration)) def push_with_retry(label, func, url): @@ -765,7 +845,7 @@ def push_with_retry(label, func, url): pushgateway_ok = push_with_retry( "Pushgateway", - lambda: requests.put(PUSHGATEWAY, data=metrics, timeout=10), + lambda: requests.post(PUSHGATEWAY, data=metrics, timeout=10), PUSHGATEWAY, )