diff --git a/broker_sync/providers/imap.py b/broker_sync/providers/imap.py index 5564dd3..0b9bbb7 100644 --- a/broker_sync/providers/imap.py +++ b/broker_sync/providers/imap.py @@ -151,14 +151,41 @@ def _fetch_all(creds: ImapCreds) -> Iterator[bytes]: yield raw -def fetch_activities(creds: ImapCreds) -> list[Activity]: - out: list[Activity] = [] - ie_parsed = schwab_parsed = ie_skipped = skipped = 0 - exclude = { - p.strip().lower() +def _resolve_excluded_providers() -> set[str]: + """Return the set of providers the IMAP fetcher must skip. + + Default-exclude list is structural — `invest-engine` is ALWAYS skipped + unless explicitly opted back in via `BROKER_SYNC_IMAP_INCLUDE_PROVIDERS`. + This protects against accidental re-ingestion via any code path that + doesn't set the cron's env (e.g. `kubectl run --rm`, devvm `poetry run`, + a sibling agent session). See post-mortem 2026-05-27 — the IMAP path + re-inserted 39 IE BUYs that had been deduped the previous day, because + the safety lived only on the cronjob spec. + + Additional providers can be excluded via + `BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS`. `INCLUDE` always wins over + `EXCLUDE` and the default skip-list. + """ + _DEFAULT_EXCLUDED = {"invest-engine", "invest_engine"} + extra = { + p.strip().lower().replace("_", "-") for p in os.environ.get("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "").split(",") if p.strip() } + include = { + p.strip().lower().replace("_", "-") + for p in os.environ.get("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", "").split(",") + if p.strip() + } + # Canonicalise the default set under the same key normalisation. + canonical = {p.replace("_", "-") for p in _DEFAULT_EXCLUDED} + return (canonical | extra) - include + + +def fetch_activities(creds: ImapCreds) -> list[Activity]: + out: list[Activity] = [] + ie_parsed = schwab_parsed = ie_skipped = skipped = 0 + exclude = _resolve_excluded_providers() for raw in _fetch_all(creds): try: msg = email.message_from_bytes(raw) @@ -167,7 +194,7 @@ def fetch_activities(creds: ImapCreds) -> list[Activity]: continue sender = _extract_sender(msg) if sender in _IE_SENDERS or sender.endswith("@investengine.com"): - if "invest-engine" in exclude or "invest_engine" in exclude: + if "invest-engine" in exclude: ie_skipped += 1 continue out.extend(ie_parser.parse_invest_engine_email(raw)) diff --git a/tests/providers/test_imap.py b/tests/providers/test_imap.py index 0264b37..30b09d1 100644 --- a/tests/providers/test_imap.py +++ b/tests/providers/test_imap.py @@ -105,10 +105,15 @@ def test_non_ie_activities_passed_through_unchanged() -> None: assert routed[0].account_type is AccountType.GIA -def test_exclude_invest_engine_skips_ie_emails(monkeypatch: MonkeyPatch) -> None: - """BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine should skip IE messages - so we don't duplicate IE buys already ingested via the bearer-token API path. - Schwab routing must remain unaffected.""" +def test_invest_engine_skipped_by_default(monkeypatch: MonkeyPatch) -> None: + """InvestEngine messages MUST be skipped by default, even with no env set. + + Post-mortem 2026-05-27: any code path that doesn't set the cron's env + (e.g. `kubectl run --rm` or devvm `poetry run`) was re-importing IE + BUYs through this IMAP path. The opt-out env var was a foot-gun. + Invariant now: structural default skip; opt back in only with + BROKER_SYNC_IMAP_INCLUDE_PROVIDERS. + """ from broker_sync.providers import imap as imap_mod from broker_sync.providers.parsers import invest_engine as ie_parser @@ -130,15 +135,60 @@ def test_exclude_invest_engine_skips_ie_emails(monkeypatch: MonkeyPatch) -> None creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d") - monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "invest-engine") - out_excluded = imap_mod.fetch_activities(creds) - # IE skipped → only the schwab activity is emitted - assert len(out_excluded) == 1 - + # Default (no env): IE skipped, Schwab parsed. monkeypatch.delenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", raising=False) + monkeypatch.delenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", raising=False) out_default = imap_mod.fetch_activities(creds) - # Both providers fire when env unset - assert len(out_default) == 2 + assert len(out_default) == 1, "IE must be skipped by default; only Schwab emitted" + + +def test_invest_engine_opt_in_via_include_env(monkeypatch: MonkeyPatch) -> None: + """Setting BROKER_SYNC_IMAP_INCLUDE_PROVIDERS=invest-engine re-enables + IE parsing (escape hatch for the legacy IMAP path).""" + from broker_sync.providers import imap as imap_mod + from broker_sync.providers.parsers import invest_engine as ie_parser + + ie_email = b"From: noreply@investengine.com\r\n\r\nirrelevant\r\n" + schwab_email = b"From: donotreply@schwab.com\r\n\r\n\r\n" + monkeypatch.setattr(imap_mod, "_fetch_all", lambda _: [ie_email, schwab_email]) + monkeypatch.setattr(ie_parser, "parse_invest_engine_email", lambda raw: [object()]) + monkeypatch.setattr(imap_mod, "parse_schwab_email", lambda html: [object()]) + + creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d") + + monkeypatch.setenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", "invest-engine") + monkeypatch.delenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", raising=False) + out = imap_mod.fetch_activities(creds) + assert len(out) == 2, "INCLUDE=invest-engine must re-enable IE parsing" + + +def test_exclude_schwab_still_works(monkeypatch: MonkeyPatch) -> None: + """EXCLUDE env still works for other providers (forward-compat).""" + from broker_sync.providers import imap as imap_mod + from broker_sync.providers.parsers import invest_engine as ie_parser + + schwab_email = b"From: donotreply@schwab.com\r\n\r\n\r\n" + monkeypatch.setattr(imap_mod, "_fetch_all", lambda _: [schwab_email]) + monkeypatch.setattr(ie_parser, "parse_invest_engine_email", lambda raw: [object()]) + monkeypatch.setattr(imap_mod, "parse_schwab_email", lambda html: [object()]) + + creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d") + + monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "schwab") + monkeypatch.delenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", raising=False) + out = imap_mod.fetch_activities(creds) + assert len(out) == 0, "Schwab must be skipped when in EXCLUDE list" + + +def test_include_overrides_default_and_exclude(monkeypatch: MonkeyPatch) -> None: + """INCLUDE wins over both the structural default and EXCLUDE env var.""" + from broker_sync.providers import imap as imap_mod + + monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "invest-engine,schwab") + monkeypatch.setenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", "invest-engine") + resolved = imap_mod._resolve_excluded_providers() + assert "invest-engine" not in resolved + assert "schwab" in resolved def test_schwab_subdomain_sender_matches() -> None: