diff --git a/docs/runbooks/mailserver-pfsense-haproxy.md b/docs/runbooks/mailserver-pfsense-haproxy.md index 564554eb..329be214 100644 --- a/docs/runbooks/mailserver-pfsense-haproxy.md +++ b/docs/runbooks/mailserver-pfsense-haproxy.md @@ -12,7 +12,11 @@ so pfSense runs a small HAProxy that: 1. Listens on the pfSense VLAN20 IP (`10.0.20.1`) on all 4 mail ports, 2. Forwards each connection to a k8s node's NodePort with `send-proxy-v2`, 3. Injects PROXY v2 framing so Postfix/Dovecot see the original client IP, -4. TCP health-checks every k8s worker — any node can serve (ETP:Cluster). +4. TCP-checks every k8s worker via dedicated **non-PROXY healthcheck NodePorts** + (30145/30146/30147 → pod stock 25/465/587 listeners, no PROXY required). + This split path avoids the `smtpd_peer_hostaddr_to_sockaddr` fatal that + used to fire on every PROXY-aware health probe and throttled real client + connections. Corresponding k8s-side setup (`stacks/mailserver/modules/mailserver/`): @@ -23,14 +27,20 @@ Corresponding k8s-side setup (`stacks/mailserver/modules/mailserver/`): - `:5587` smtpd (alt :587 submission) with `smtpd_upstream_proxy_protocol=haproxy` - ConfigMap `mailserver.config` adds Dovecot `inet_listener imaps_proxy` on port 10993 with `haproxy = yes` and `haproxy_trusted_networks = 10.0.20.0/24`. -- Service `mailserver-proxy` (NodePort, ETP:Cluster) with 4 NodePorts: - - `port 25 → targetPort 2525 → nodePort 30125` - - `port 465 → targetPort 4465 → nodePort 30126` - - `port 587 → targetPort 5587 → nodePort 30127` - - `port 993 → targetPort 10993 → nodePort 30128` +- Service `mailserver-proxy` (NodePort, ETP:Cluster) — 4 PROXY data ports + + 3 non-PROXY healthcheck ports: + - Data (PROXY v2): + - `port 25 → targetPort 2525 → nodePort 30125` + - `port 465 → targetPort 4465 → nodePort 30126` + - `port 587 → targetPort 5587 → nodePort 30127` + - `port 993 → targetPort 10993 → nodePort 30128` + - Healthcheck (no PROXY, stock SMTP/SMTPS/Submission listeners): + - `port 2500 → targetPort 25 → nodePort 30145` (smtp-check) + - `port 4650 → targetPort 465 → nodePort 30146` (smtps-check) + - `port 5870 → targetPort 587 → nodePort 30147` (sub-check) - Service `mailserver` (ClusterIP) — unchanged stock ports 25/465/587/993 for intra-cluster clients (Roundcube pod, `email-roundtrip-monitor` - CronJob). These listeners are PROXY-free. + CronJob, book-search). These listeners are PROXY-free. bd: `code-yiu`. @@ -46,7 +56,9 @@ External mail (WAN) path — PROXY v2 │ │ NAT rdr → 10.0.20.1:{same} │ │ ▼ │ │ pfSense HAProxy (mode tcp, 4 frontends, 4 backend pools) │ -│ │ send-proxy-v2 + tcp-check inter 120000 │ +│ │ data: send-proxy-v2 → :{30125..30128} (PROXY-aware pod) │ +│ │ health: TCP-check → :{30145..30147} (no-PROXY pod) │ +│ │ inter 5000 │ │ ▼ │ │ k8s-node<1-4>:{30125..30128} ← any node (ETP:Cluster) │ │ │ kube-proxy SNAT (source IP lost on the wire) │ @@ -186,11 +198,18 @@ Full restore: pfSense WebUI → Diagnostics → Backup & Restore → Upload that ## Known warts -- HAProxy TCP health-check with `send-proxy-v2` generates `getpeername: - Transport endpoint not connected` warnings on postscreen every check cycle. - Mitigated with `inter 120000` (2 min). To reduce further, switch to - `option smtpchk` — but that requires a separate non-PROXY health-check - port on the pod (not done yet). +- ~~HAProxy TCP health-check with `send-proxy-v2` generates `getpeername: + Transport endpoint not connected` warnings on postscreen every check cycle.~~ + **Resolved 2026-05-05**: dedicated non-PROXY healthcheck NodePorts + (30145/30146/30147 → stock pod 25/465/587) added; HAProxy now checks + those, eliminating both the `getpeername` postscreen warnings and the + `smtpd_peer_hostaddr_to_sockaddr: ... Servname not supported` fatals + that were throttling smtpd respawns and causing ~50% client timeouts on + the public 587 path. `inter` dropped 120000 → 5000 (fast failover, no + log-spam concern). `option smtpchk` was tried but flapped against + postscreen (multi-line greet + DNSBL silence + anti-pre-greet detection + trip HAProxy's parser → L7RSP). Plain TCP check on the no-PROXY ports + is sufficient. - Frontend binds on all pfSense interfaces (`bind :25` instead of `10.0.20.1:25`). `` is set in XML but pfSense templates it port-only. Low concern in practice because WAN firewall rules plus the diff --git a/scripts/pfsense-haproxy-bootstrap.php b/scripts/pfsense-haproxy-bootstrap.php index 3834d852..5452b198 100644 --- a/scripts/pfsense-haproxy-bootstrap.php +++ b/scripts/pfsense-haproxy-bootstrap.php @@ -68,7 +68,35 @@ $NODES = [ ['k8s-node4', '10.0.20.104'], ]; -function build_pool(string $name, string $nodeport, array $nodes): array { +// Build a pool with optional split healthcheck path. +// +// $check_port: if non-null, HAProxy sends health probes to that NodePort +// (which Service `mailserver-proxy` maps to the pod's stock no-PROXY +// listener — see infra/stacks/mailserver/.../mailserver_proxy ports +// 30145/30146/30147). Real client traffic still goes to $nodeport with +// PROXY v2 framing. +// $check_type: 'TCP' for plain accept-on-port checks, 'ESMTP' for +// `option smtpchk EHLO ` (real SMTP banner+EHLO+250). +// +// Why split: smtpd-proxy587/4465 fatal on every PROXY-v2-aware health +// probe with `smtpd_peer_hostaddr_to_sockaddr: ... Servname not supported` +// — the daemon respawns get throttled by Postfix master and real clients +// land mid-respawn → 6s TCP timeout. Routing health probes to the stock +// no-PROXY port sidesteps the bug entirely while data path still gets +// PROXY v2 for CrowdSec/Postfix client-IP visibility. The HAProxy package +// has no `checkport` field, so `port N` is appended via the server's +// `advanced` string (HAProxy parses server keywords in any order). +function build_pool( + string $name, + string $nodeport, + array $nodes, + string $check_type = 'TCP', + ?string $check_port = null, + string $monitor_domain = '' +): array { + $advanced_check = $check_port !== null + ? "send-proxy-v2 port {$check_port}" + : 'send-proxy-v2'; $servers = []; foreach ($nodes as $n) { $servers[] = [ @@ -77,18 +105,19 @@ function build_pool(string $name, string $nodeport, array $nodes): array { 'port' => $nodeport, 'weight' => '10', 'ssl' => '', - // check every 2 min — send-proxy-v2 check + close generates - // noise on postscreen, not worth doing more often. - 'checkinter' => '120000', - 'advanced' => 'send-proxy-v2', + // 5s = sub-block-window failover when a NodePort goes sour. + // Safe to be aggressive once health probes don't fatal smtpd. + 'checkinter' => '5000', + 'advanced' => $advanced_check, 'status' => 'active', ]; } return [ 'name' => $name, 'balance' => 'roundrobin', - 'check_type' => 'TCP', - 'checkinter' => '120000', + 'check_type' => $check_type, + 'monitor_domain' => $monitor_domain, + 'checkinter' => '5000', 'retries' => '3', 'ha_servers' => ['item' => $servers], 'advanced_bind' => '', @@ -132,9 +161,28 @@ $h['ha_pools']['item'] = array_values(array_filter( $h['ha_pools']['item'][] = build_pool('mailserver_nodes', '30125', $NODES); // Production pools — one per mail port. -$h['ha_pools']['item'][] = build_pool('mailserver_nodes_smtp', '30125', $NODES); -$h['ha_pools']['item'][] = build_pool('mailserver_nodes_smtps', '30126', $NODES); -$h['ha_pools']['item'][] = build_pool('mailserver_nodes_sub', '30127', $NODES); +// +// All SMTP/SMTPS/Submission backends use plain TCP checks against +// dedicated non-PROXY healthcheck NodePorts (30145/30146/30147 → pod +// stock 25/465/587) so probes hit the no-PROXY listeners and avoid +// the smtpd_peer_hostaddr_to_sockaddr fatal that fires on PROXY-v2 +// LOCAL frames. Real client traffic still goes to 30125-30128 with +// PROXY v2 for client-IP visibility. +// +// We tried `option smtpchk EHLO` initially — it works on the plain +// `submission` daemon (587) but flaps the `postscreen` listener on +// port 25 (multi-line greet + DNSBL silence + anti-pre-greet +// detection makes HAProxy's simple smtpchk parser hit L7RSP). A +// plain TCP accept-on-port check is enough for both: HAProxy still +// gets fast failover when the listener actually goes away, and we +// stop triggering the Postfix fatal entirely. +// +// IMAPS stays on its existing TCP-check-with-PROXY-frame for now — +// Dovecot's PROXY parser doesn't show the same fatal pattern; adding +// a separate IMAP healthcheck path would require another svc port. +$h['ha_pools']['item'][] = build_pool('mailserver_nodes_smtp', '30125', $NODES, 'TCP', '30145'); +$h['ha_pools']['item'][] = build_pool('mailserver_nodes_smtps', '30126', $NODES, 'TCP', '30146'); +$h['ha_pools']['item'][] = build_pool('mailserver_nodes_sub', '30127', $NODES, 'TCP', '30147'); $h['ha_pools']['item'][] = build_pool('mailserver_nodes_imaps', '30128', $NODES); // ── Frontends ─────────────────────────────────────────────────────────── diff --git a/stacks/ebooks/main.tf b/stacks/ebooks/main.tf index 7500e579..c6978a05 100644 --- a/stacks/ebooks/main.tf +++ b/stacks/ebooks/main.tf @@ -785,8 +785,18 @@ resource "kubernetes_deployment" "book_search" { } } env { - name = "SMTP_HOST" - value = "mail.viktorbarzin.me" + name = "SMTP_HOST" + # Use intra-cluster ClusterIP path — bypasses pfSense HAProxy + + # PROXY v2 (the public path hairpins through HAProxy:587 → + # NodePort → pod :5587 where Postfix's smtpd-proxy587 daemon + # crashes ~50% of HAProxy healthchecks with + # `smtpd_peer_hostaddr_to_sockaddr: ... Servname not supported`, + # producing intermittent 6s TCP timeouts for clients that land + # mid-respawn). The ClusterIP service points to pod port 587 + # (stock submission daemon, no PROXY) and is rock-solid (12/12 + # in <31ms vs 6/12 timeouts on the public path). + # See docs/runbooks/mailserver-pfsense-haproxy.md. + value = "mailserver.mailserver.svc.cluster.local" } env { name = "SMTP_PORT" diff --git a/stacks/mailserver/modules/mailserver/main.tf b/stacks/mailserver/modules/mailserver/main.tf index dd9dd6bf..c3c33d26 100644 --- a/stacks/mailserver/modules/mailserver/main.tf +++ b/stacks/mailserver/modules/mailserver/main.tf @@ -733,6 +733,35 @@ resource "kubernetes_service" "mailserver_proxy" { target_port = 10993 node_port = 30128 } + # Dedicated non-PROXY healthcheck NodePorts. HAProxy on pfSense uses + # `option smtpchk` against these stock pod ports (25/465/587, no PROXY) + # so health probes don't hit the smtpd_peer_hostaddr_to_sockaddr fatal + # that fires on PROXY-v2 LOCAL/AF_UNSPEC frames sent during checks. The + # data path (30125-30128 → 2525/4465/5587/10993) still gets PROXY v2 for + # real client IP visibility — only the healthcheck path is split off. + # See infra/scripts/pfsense-haproxy-bootstrap.php (`check port` directive) + # and docs/runbooks/mailserver-pfsense-haproxy.md. + port { + name = "smtp-check" + protocol = "TCP" + port = 2500 + target_port = 25 + node_port = 30145 + } + port { + name = "smtps-check" + protocol = "TCP" + port = 4650 + target_port = 465 + node_port = 30146 + } + port { + name = "sub-check" + protocol = "TCP" + port = 5870 + target_port = 587 + node_port = 30147 + } } }