From 6a75ed4809223308979e93cc7c640097b009e21f Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 19 Apr 2026 00:20:54 +0000 Subject: [PATCH] [mailserver] Add targeted retention for spam@ mailbox ## Context The @viktorbarzin.me catch-all routes to spam@viktorbarzin.me. The mailbox had no retention policy. On 2026-04-18 it held 519 messages consuming 43 MiB. Without a policy, the only brake on growth was manual deletion, which has not been happening - hence the bd task. Viktor's explicit constraint when filing code-oy4: DO NOT blind age-expunge. We need targeted retention that keeps genuine forwarded human mail for a long time while shedding the recurring-newsletter cruft that dominates the byte count. ## Profile findings (2026-04-18, verified on the live pod) Total: 519 messages, 43 MiB, 0 in new/, 0 in tmp/. Top senders by volume: 138 dan@tldrnewsletter.com 51 hi@ratepunk.com 40 uber@uber.com 35 truenas@viktorbarzin.me 19 ubereats@uber.com 15 hello@travel.jacksflightclub.com 12 chris@chriswillx.com 10 me@viktorbarzin.me Top senders by storage bytes: 8,176,481 dan@tldrnewsletter.com (19 % of 43 MiB alone) 2,866,104 uber@uber.com 2,207,458 noreply@mail.selfh.st 2,066,094 hi@ratepunk.com 1,675,435 ubereats@uber.com Age distribution: 97 % older than 14 days (502 / 519) 23 % older than 90 days (121 / 519) Automated-sender markers: 66 % carry List-Unsubscribe: (342 / 519) 4 % carry Precedence: bulk|list|junk ( 21 / 519) 34 % carry neither marker (= human-ish tail) (177 / 519) Combined "automated AND >14d": 328 messages -> target of rule 1. ## Retention strategy Signed off by Viktor 2026-04-18. Two rules, both delete-leaf: 1. Older than 14 days AND header matches one of: - `^List-Unsubscribe:` - `^Precedence:\s*(bulk|list|junk)` - `^Auto-Submitted:\s*auto-` -> DELETE. Rationale: these markers are the RFC-agreed indicators of bulk / robotic senders. A 14-day window still lets genuine subscription alerts (delivery, flight, calendar invite) come to attention. 2. Older than 90 days AND no automated marker at all -> DELETE. Rationale: these are long-tail forwards from real people to the catch-all. 90 days is deliberately generous - I would rather leak bytes than lose Viktor's personal correspondence. 3. Everything else -> KEEP (recent traffic, or aged human tail younger than 90d). ## Implementation A `kubernetes_cron_job_v1.spam_retention` running every 4h (at :17 past) that `kubectl exec`s a Python retention script into the mailserver pod. Why kubectl exec and not a sibling CronJob with the Maildir mounted: mailserver-data-encrypted is a RWO volume held by the mailserver pod. A sibling would fail to attach. The nextcloud-watchdog pattern in stacks/nextcloud/main.tf already solves this for a similar "interact with the live pod on a schedule" shape. Mirrored here with its own SA + Role + RoleBinding scoped to list/get pods and create pods/exec in the mailserver namespace only. Why Python and not pure shell: POSIX `find + stat + awk` struggles with the header-scan-up-to-blank-line rule, and `stat -c` is Linux- GNU-specific anyway. The script reads each message's first 64 KiB, stops at the first blank line, scans headers only, then checks mtime. The CronJob streams the Python source via `kubectl exec -i ... -- python3 - < expected tail: spam_retention_scanned_total spam_retention_auto_deleted_total spam_retention_human_deleted_total spam_retention_kept_total spam_retention_errors_total 0 Retention pass complete 3. Confirm mailbox shrunk: `kubectl -n mailserver exec deploy/mailserver -c docker-mailserver \ -- du -sh /var/mail/viktorbarzin.me/spam/` -> expected: well below 43 MiB within one run (bulk rule alone purges ~328 messages per the profile numbers above). 4. Confirm IMAP reflects the deletions: `kubectl -n mailserver exec deploy/mailserver -c docker-mailserver \ -- doveadm mailbox status -u spam@viktorbarzin.me messages INBOX/spam` -> expected: message count dropped accordingly. 5. 4 hours later, confirm the next scheduled run logs a much smaller scan count and 0 deletions (nothing new crossed the threshold). Closes: code-oy4 Co-Authored-By: Claude Opus 4.7 (1M context) --- stacks/mailserver/modules/mailserver/main.tf | 219 +++++++++++++++++++ 1 file changed, 219 insertions(+) diff --git a/stacks/mailserver/modules/mailserver/main.tf b/stacks/mailserver/modules/mailserver/main.tf index d85b0fdd..43e7d5d2 100644 --- a/stacks/mailserver/modules/mailserver/main.tf +++ b/stacks/mailserver/modules/mailserver/main.tf @@ -1133,3 +1133,222 @@ resource "kubernetes_cron_job_v1" "roundcube-backup" { } } + +# ============================================================================= +# Spam mailbox targeted retention (code-oy4) +# +# The @viktorbarzin.me catch-all routes to spam@viktorbarzin.me. Unbounded +# growth (~43 MiB baseline on 2026-04-18, 519 messages, top sender +# tldrnewsletter.com = 138 msgs / 8.2 MiB) makes it painful to triage. +# Profile (2026-04-18): +# - 502/519 messages older than 14 days (97 %) +# - 342/519 carry List-Unsubscribe: (66 %) +# - 21/519 carry Precedence: bulk ( 4 %) +# - 177/519 carry neither marker (= human-ish, 34 %) +# +# Strategy (user-signed-off 2026-04-18, do NOT blind-age-expunge): +# - Messages older than 14 days carrying List-Unsubscribe OR +# Precedence: bulk|list|junk OR Auto-Submitted: auto-* -> DELETE +# - Messages older than 90 days with no automated-sender marker +# -> DELETE (long-tail human forwards) +# - Everything else -> KEEP +# +# Implementation: kubectl exec into the mailserver pod because the +# Maildir lives on a RWO encrypted PVC; a sibling CronJob would fail to +# attach the volume while the mailserver pod holds it. Pattern mirrors +# the `nextcloud-watchdog` in stacks/nextcloud/main.tf. +# ============================================================================= +resource "kubernetes_service_account" "spam_retention" { + metadata { + name = "spam-retention" + namespace = kubernetes_namespace.mailserver.metadata[0].name + } +} + +resource "kubernetes_role" "spam_retention" { + metadata { + name = "spam-retention" + namespace = kubernetes_namespace.mailserver.metadata[0].name + } + rule { + api_groups = [""] + resources = ["pods"] + verbs = ["list", "get"] + } + rule { + api_groups = [""] + resources = ["pods/exec"] + verbs = ["create"] + } +} + +resource "kubernetes_role_binding" "spam_retention" { + metadata { + name = "spam-retention" + namespace = kubernetes_namespace.mailserver.metadata[0].name + } + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "Role" + name = kubernetes_role.spam_retention.metadata[0].name + } + subject { + kind = "ServiceAccount" + name = kubernetes_service_account.spam_retention.metadata[0].name + namespace = kubernetes_namespace.mailserver.metadata[0].name + } +} + +resource "kubernetes_cron_job_v1" "spam_retention" { + metadata { + name = "spam-retention" + namespace = kubernetes_namespace.mailserver.metadata[0].name + } + spec { + schedule = "17 */4 * * *" + concurrency_policy = "Forbid" + successful_jobs_history_limit = 2 + failed_jobs_history_limit = 3 + starting_deadline_seconds = 300 + job_template { + metadata {} + spec { + active_deadline_seconds = 600 + backoff_limit = 1 + ttl_seconds_after_finished = 600 + template { + metadata {} + spec { + service_account_name = kubernetes_service_account.spam_retention.metadata[0].name + restart_policy = "Never" + container { + name = "spam-retention" + image = "bitnami/kubectl:latest" + command = ["/bin/bash", "-c", <<-EOF + set -euo pipefail + + POD=$(kubectl -n mailserver get pods -l app=mailserver -o jsonpath='{.items[0].metadata.name}') + if [ -z "$POD" ]; then + echo "ERROR: no mailserver pod found" >&2 + exit 1 + fi + echo "Targeting pod $POD" + + # Stream the retention script to python3 inside the mailserver + # container via stdin. Keeping the logic in Python avoids the + # POSIX-sh/awk fragility around stat(1) differences and header + # matching. + kubectl -n mailserver exec -i "$POD" -c docker-mailserver -- python3 - <<'PYEOF' + import os + import re + import sys + import time + + SPAM = "/var/mail/viktorbarzin.me/spam/cur" + # Retention thresholds, in days, one per rule. + AUTOMATED_MAX_AGE_DAYS = 14 + HUMAN_MAX_AGE_DAYS = 90 + HEADER_SCAN_BYTES = 65536 + + AUTO_PATTERNS = ( + re.compile(rb"^list-unsubscribe:", re.IGNORECASE), + re.compile(rb"^precedence:\s*(bulk|list|junk)", re.IGNORECASE), + re.compile(rb"^auto-submitted:\s*auto-", re.IGNORECASE), + ) + + def is_automated(path): + try: + with open(path, "rb") as fh: + head = fh.read(HEADER_SCAN_BYTES) + except OSError: + return False + hdr, _, _ = head.partition(b"\r\n\r\n") + if hdr == head: + hdr, _, _ = head.partition(b"\n\n") + for line in hdr.splitlines(): + for pat in AUTO_PATTERNS: + if pat.search(line): + return True + return False + + if not os.path.isdir(SPAM): + print(f"SKIP: {SPAM} does not exist") + sys.exit(0) + + now = time.time() + scanned = auto_deleted = human_deleted = kept = errors = 0 + + for entry in sorted(os.listdir(SPAM)): + path = os.path.join(SPAM, entry) + try: + st = os.stat(path) + except OSError: + errors += 1 + continue + if not os.path.isfile(path): + continue + scanned += 1 + age_days = (now - st.st_mtime) / 86400 + automated = is_automated(path) + + if automated and age_days > AUTOMATED_MAX_AGE_DAYS: + try: + os.unlink(path) + auto_deleted += 1 + except OSError: + errors += 1 + continue + if (not automated) and age_days > HUMAN_MAX_AGE_DAYS: + try: + os.unlink(path) + human_deleted += 1 + except OSError: + errors += 1 + continue + kept += 1 + + # Metric lines (Pushgateway-compatible format). The parent + # kubectl wrapper logs them for now; Pushgateway integration + # is a follow-up. + print(f"spam_retention_scanned_total {scanned}") + print(f"spam_retention_auto_deleted_total {auto_deleted}") + print(f"spam_retention_human_deleted_total {human_deleted}") + print(f"spam_retention_kept_total {kept}") + print(f"spam_retention_errors_total {errors}") + + sys.exit(1 if errors else 0) + PYEOF + + # Refresh Dovecot index so IMAP sees the deletions immediately. + kubectl -n mailserver exec "$POD" -c docker-mailserver -- \ + doveadm force-resync -u spam@viktorbarzin.me INBOX/spam || true + + echo "Retention pass complete" + EOF + ] + resources { + requests = { + cpu = "10m" + memory = "32Mi" + } + limits = { + memory = "128Mi" + } + } + } + dns_config { + option { + name = "ndots" + value = "2" + } + } + } + } + } + } + } + lifecycle { + # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2 + ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] + } +}