From ba697b02a274b0eca7a5978c8488e82e090d6e9c Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 19 Apr 2026 12:07:47 +0000 Subject: [PATCH] =?UTF-8?q?[mailserver]=20Phase=202-3=20=E2=80=94=20pfSens?= =?UTF-8?q?e=20HAProxy=20bootstrap=20+=20runbook=20[ci=20skip]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context (bd code-yiu) Phase 2 (HAProxy on pfSense) and Phase 3 (persist config in pfSense XML so it lives in the nightly backup) of the PROXY-v2 migration. Test path only — listens on pfSense 10.0.20.1:2525 → k8s node NodePort :30125 → pod :2525 postscreen. Real client IP verified in maillog (`postfix/smtpd-proxy/postscreen: CONNECT from [10.0.10.10]:...`), Phase 1a container plumbing is already live (commit ef75c02f). pfSense HAProxy config lives in `/cf/conf/config.xml` under ``. That file is captured daily by `scripts/daily-backup.sh` (scp → `/mnt/backup/pfsense/config-YYYYMMDD.xml`) and synced offsite to Synology. No new backup wiring needed — this commit documents the fact + adds the reproducer script. ## This change Two files, both additive: 1. `scripts/pfsense-haproxy-bootstrap.php` — idempotent PHP script that edits pfSense config.xml to add: - Backend pool `mailserver_nodes` with 4 k8s workers on NodePort 30125, `send-proxy-v2`, TCP health-check every 120000 ms (2 min). - Frontend `mailserver_proxy_test` listening on pfSense 10.0.20.1:2525 in TCP mode, forwarding to the pool. Uses `haproxy_check_and_run()` to regenerate `/var/etc/haproxy/haproxy.cfg` and reload HAProxy. Removes existing items with the same name before adding, so repeat runs converge on declared state. 2. `docs/runbooks/mailserver-pfsense-haproxy.md` — ops runbook covering current state, validation, bootstrap/restore, health checks, phase roadmap, and known warts (health-check noise + bind-address templating). ## What is NOT in this change - Phase 4 (NAT rdr flip for :25 from `` → HAProxy) — deferred. - Phase 5 (extend to 465/587/993 with alt listeners + Dovecot dual- inet_listener) — deferred. - Terraform for pfSense HAProxy pkg install — not possible (no Terraform provider for pfSense pkg management). Runbook documents the manual `pkg install` command. ## Test Plan ### Automated ``` $ ssh admin@10.0.20.1 'pgrep -lf haproxy; sockstat -l | grep :2525' 64009 /usr/local/sbin/haproxy -f /var/etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D www haproxy 64009 5 tcp4 *:2525 *:* $ ssh admin@10.0.20.1 "echo 'show servers state' | socat /tmp/haproxy.socket stdio" \ | awk 'NR>1 {print $4, $6}' node1 2 node2 2 node3 2 node4 2 # all UP $ python3 -c " import socket; s=socket.socket(); s.settimeout(10) s.connect(('10.0.20.1', 2525)) print(s.recv(200).decode()) s.send(b'EHLO persist-test.example.com\r\n') print(s.recv(500).decode()) s.send(b'QUIT\r\n'); s.close()" 220-mail.viktorbarzin.me ESMTP ... 250-mail.viktorbarzin.me 250-SIZE 209715200 ... 221 2.0.0 Bye $ kubectl logs -c docker-mailserver deployment/mailserver -n mailserver --tail=50 \ | grep smtpd-proxy.*CONNECT postfix/smtpd-proxy/postscreen: CONNECT from [10.0.10.10]:33010 to [10.0.20.1]:2525 ``` Real client IP `[10.0.10.10]` visible (not the k8s-node IP after kube-proxy SNAT) → PROXY-v2 roundtrip confirmed. ### Manual Verification Trigger a pfSense reboot; after boot, HAProxy should auto-restart from the now-persisted config (`yes` in XML). Connection test above should still work. ## Reproduce locally 1. `scp infra/scripts/pfsense-haproxy-bootstrap.php admin@10.0.20.1:/tmp/` 2. `ssh admin@10.0.20.1 'php /tmp/pfsense-haproxy-bootstrap.php'` → rc=OK 3. `python3 -c '...' ` SMTP roundtrip test above. --- docs/runbooks/mailserver-pfsense-haproxy.md | 150 ++++++++++++++++++++ scripts/pfsense-haproxy-bootstrap.php | 117 +++++++++++++++ 2 files changed, 267 insertions(+) create mode 100644 docs/runbooks/mailserver-pfsense-haproxy.md create mode 100644 scripts/pfsense-haproxy-bootstrap.php diff --git a/docs/runbooks/mailserver-pfsense-haproxy.md b/docs/runbooks/mailserver-pfsense-haproxy.md new file mode 100644 index 00000000..3a57d645 --- /dev/null +++ b/docs/runbooks/mailserver-pfsense-haproxy.md @@ -0,0 +1,150 @@ +# pfSense HAProxy for Mailserver — Runbook + +Last updated: 2026-04-19 + +## What & why + +External mail traffic (SMTP/IMAP) requires **real client IP visibility** for +CrowdSec + Postfix rate-limiting. MetalLB cannot inject PROXY-protocol +headers (see [`mailserver-proxy-protocol.md`](./mailserver-proxy-protocol.md)), +so pfSense runs a small HAProxy that: + +1. Listens on the pfSense VIP, +2. Forwards each connection to a k8s node's NodePort, +3. Injects PROXY-v2 framing so Postfix/Dovecot see the original client IP, +4. TCP health-checks every worker — any node can serve. + +Corresponding k8s-side setup lives in `stacks/mailserver/modules/mailserver/`: +- ConfigMap `mailserver-user-patches` → `user-patches.sh` appends alt + `master.cf` service on port 2525 with + `postscreen_upstream_proxy_protocol=haproxy`. +- Service `mailserver-proxy` → NodePort 30125 → targetPort 2525 → + `externalTrafficPolicy: Cluster`. + +bd: `code-yiu`. + +## Current state (Phase 3 — TEST PATH) + +``` + INTERNET + │ + (unchanged — still MetalLB path) + ↓ + WAN:25/465/587/993 ─┐ + │ TEST PATH ↓ + │ (pfSense HAProxy, port 2525 only) + pfSense NAT rdr │ + ↓ ↓ + alias (= 10.0.20.202) HAProxy on pfSense (10.0.20.1:2525) + ↓ ↓ + MetalLB VIP 10.0.20.202 k8s-node:30125 (NodePort, ETP: Cluster) + (ETP:Local, kube-proxy DNAT) ↓ + ↓ kube-proxy (SNAT — IP lost here, recovered by PROXY-v2) + mailserver pod ↓ + stock :25/:465/:587/:993 mailserver pod :2525 postscreen (PROXY-v2) +``` + +Nothing production flips to HAProxy yet; all real traffic still uses the +MetalLB LB IP path. To validate the HAProxy path: + +```sh +# From any k8s VLAN host: +python3 -c " +import socket; s=socket.socket(); s.connect(('10.0.20.1', 2525)) +print(s.recv(200).decode()) +s.send(b'EHLO testclient\r\n') +print(s.recv(500).decode()) +s.send(b'QUIT\r\n'); s.close()" + +# Then check mailserver logs for CONNECT from [YOUR-IP]: +kubectl logs -c docker-mailserver deployment/mailserver -n mailserver --tail=20 | grep smtpd-proxy +``` + +## Bootstrap / restore from scratch + +Config lives in pfSense `/cf/conf/config.xml` under +``. Backed up nightly to +`/mnt/backup/pfsense/config-YYYYMMDD.xml` by `scripts/daily-backup.sh`, then +Synology. To rebuild from source of truth (git): + +```sh +scp infra/scripts/pfsense-haproxy-bootstrap.php admin@10.0.20.1:/tmp/ +ssh admin@10.0.20.1 'php /tmp/pfsense-haproxy-bootstrap.php' +``` + +The script is idempotent — re-runs reset the mailserver frontend + backend to +the declared state. + +Expected output: +``` +haproxy_check_and_run rc=OK +messages: ... +``` + +Verify: +```sh +ssh admin@10.0.20.1 "pgrep -lf haproxy; sockstat -l | grep ':2525'" +# 64009 /usr/local/sbin/haproxy -f /var/etc/haproxy/haproxy.cfg ... +# www haproxy 64009 5 tcp4 *:2525 *:* +``` + +## Operations + +### Change backend k8s node IPs + +Edit `infra/scripts/pfsense-haproxy-bootstrap.php` → `foreach` array of +`[name, address]`, re-run via the bootstrap command above. Don't hand-edit +`/var/etc/haproxy/haproxy.cfg` — it is regenerated from XML on every apply. + +### Check health of backends + +```sh +ssh admin@10.0.20.1 "echo 'show servers state' | socat /tmp/haproxy.socket stdio" +``` +`srv_op_state=2` means UP, `0` means DOWN. + +### View live HAProxy stats (WebUI) + +`https://pfsense.viktorbarzin.me` → Services → HAProxy → Stats + +### Reload after config.xml edit + +```sh +ssh admin@10.0.20.1 'pfSsh.php playback svc restart haproxy' +``` + +### Restore from backup + +pfSense config backup is a plain XML file: +``` +/mnt/backup/pfsense/config-YYYYMMDD.xml # sda host copy (1.1TB RAID1) +/volume1/Backup/Viki/pve-backup/pfsense/... # Synology offsite +``` + +Full restore: pfSense WebUI → Diagnostics → Backup & Restore → Upload that +`config.xml`. The `` section is included. + +## Phase roadmap (bd code-yiu) + +| Phase | Status | Description | +|---|---|---| +| 1a | ✅ done (commit `ef75c02f`) | k8s alt listener `:2525` + `mailserver-proxy` NodePort | +| 2 | ✅ done (2026-04-19) | pfSense HAProxy installed + test config on `:2525` | +| 3 | ✅ done (2026-04-19) | HAProxy config persisted to pfSense `config.xml` (this runbook + `pfsense-haproxy-bootstrap.php`) | +| 4 | not yet | Flip pfSense NAT rdr for `:25` from `` alias → HAProxy VIP. Requires atomic cutover. | +| 5 | not yet | Extend to ports 465/587/993: add alt container listeners (4465/5587/10993), add Dovecot `haproxy = yes` on extra inet_listener, expand HAProxy frontends, flip NAT. | +| 6 | not yet | Observe 48h, decommission MetalLB LB path (downgrade mailserver Service from LoadBalancer to ClusterIP, free `10.0.20.202`). | + +## Known warts + +- HAProxy TCP health-check with `send-proxy-v2` + short `inter` floods + postscreen with `getpeername: Transport endpoint not connected` warnings + every check cycle. Mitigated with `inter 120000` (2 min). To reduce + further, switch to `option smtpchk` — but that requires a separate + non-PROXY health-check port on the pod (not done yet). +- Frontend binds on all pfSense interfaces (`bind :2525`) rather than just + `10.0.20.1:2525`. `` is set in XML but pfSense templates it as + port-only. Low concern while port 2525 is a test port; tighten once + promoted to real ports (25/465/587/993). +- k8s-node5 doesn't exist — cluster has master + 4 workers. Backend pool + capped at 4 servers. diff --git a/scripts/pfsense-haproxy-bootstrap.php b/scripts/pfsense-haproxy-bootstrap.php new file mode 100644 index 00000000..0a0dc094 --- /dev/null +++ b/scripts/pfsense-haproxy-bootstrap.php @@ -0,0 +1,117 @@ +`. That file IS picked up by the nightly +// `daily-backup` on the PVE host (see `scripts/daily-backup.sh` → `scp +// root@10.0.20.1:/cf/conf/config.xml`) and synced to Synology. This script +// is the canonical reproducer: run it to rebuild the pfSense HAProxy config +// from scratch (DR restore, fresh pfSense install, etc.). +// +// WHAT IT BUILDS +// Backend pool `mailserver_nodes`: 4 k8s workers on NodePort 30125 with +// `send-proxy-v2` + TCP health-check every 120s. +// Frontend `mailserver_proxy_test`: listens on 10.0.20.1:2525, TCP mode, +// forwards to the pool above. +// +// USAGE (on pfSense host, via SSH as admin) +// scp infra/scripts/pfsense-haproxy-bootstrap.php admin@10.0.20.1:/tmp/ +// ssh admin@10.0.20.1 'php /tmp/pfsense-haproxy-bootstrap.php' +// +// IDEMPOTENCY +// Removes any existing entries named `mailserver_nodes` / `mailserver_proxy_test` +// before re-adding, so repeat runs are safe and behave as reset-to-declared. + +require_once('/etc/inc/config.inc'); +require_once('/usr/local/pkg/haproxy/haproxy.inc'); +require_once('/usr/local/pkg/haproxy/haproxy_utils.inc'); + +global $config; +parse_config(true); + +if (!is_array($config['installedpackages']['haproxy'])) { + $config['installedpackages']['haproxy'] = []; +} +$h = &$config['installedpackages']['haproxy']; + +$h['enable'] = 'yes'; +$h['maxconn'] = '1000'; + +// ── Backend pool ──────────────────────────────────────────────────────── +if (!is_array($h['ha_pools'])) $h['ha_pools'] = ['item' => []]; +if (!is_array($h['ha_pools']['item'])) $h['ha_pools']['item'] = []; +$h['ha_pools']['item'] = array_values(array_filter( + $h['ha_pools']['item'], + fn($p) => ($p['name'] ?? '') !== 'mailserver_nodes' +)); + +$servers = []; +foreach ([ + ['k8s-node1', '10.0.20.101'], + ['k8s-node2', '10.0.20.102'], + ['k8s-node3', '10.0.20.103'], + ['k8s-node4', '10.0.20.104'], +] as $n) { + $servers[] = [ + 'name' => $n[0], + 'address' => $n[1], + 'port' => '30125', + 'weight' => '10', + 'ssl' => '', + // check every 2 minutes to avoid flooding postscreen with + // send-proxy-v2 + immediate close connections (see bd code-yiu notes). + 'checkinter' => '120000', + 'advanced' => 'send-proxy-v2', + 'status' => 'active', + ]; +} + +$h['ha_pools']['item'][] = [ + 'name' => 'mailserver_nodes', + 'balance' => 'roundrobin', + 'check_type' => 'TCP', + 'checkinter' => '120000', + 'retries' => '3', + 'ha_servers' => ['item' => $servers], + 'advanced_bind' => '', + 'persist_cookie_enabled' => '', + 'transparent_clientip' => '', + 'advanced' => '', +]; + +// ── Frontend (pfSense "ha_backends") ──────────────────────────────────── +if (!is_array($h['ha_backends'])) $h['ha_backends'] = ['item' => []]; +if (!is_array($h['ha_backends']['item'])) $h['ha_backends']['item'] = []; +$h['ha_backends']['item'] = array_values(array_filter( + $h['ha_backends']['item'], + fn($f) => ($f['name'] ?? '') !== 'mailserver_proxy_test' +)); + +$h['ha_backends']['item'][] = [ + 'name' => 'mailserver_proxy_test', + 'descr' => 'code-yiu Phase 3 test — PROXY v2 to k8s mailserver NodePort 30125', + 'status' => 'active', + 'secondary' => '', + 'type' => 'tcp', + 'a_extaddr' => ['item' => [[ + 'extaddr' => '10.0.20.1', + 'extaddr_port' => '2525', + 'extaddr_ssl' => '', + 'extaddr_advanced' => '', + ]]], + 'backend_serverpool' => 'mailserver_nodes', + 'ha_acls' => '', + 'dontlognull'=> '', + 'httpclose' => '', + 'forwardfor' => '', + 'advanced' => '', +]; + +write_config('code-yiu: mailserver_proxy HAProxy frontend + backend (bootstrap)'); + +$messages = ''; +$rc = haproxy_check_and_run($messages, true); +echo 'haproxy_check_and_run rc=' . ($rc ? 'OK' : 'FAIL') . "\n"; +echo "messages: $messages\n";