imap: skip InvestEngine by default; opt back in via INCLUDE env
Post-mortem 2026-05-27: 39 IMAP-source IE BUYs + their cash-flow DEPOSITs were re-inserted into Wealthfolio at 09:22:18 UTC, exactly the rows the £252k dedup removed the previous day. The cron's BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine env var did its job (cron logged ie_skipped=53), but some other entry point — kubectl run, poetry run on the devvm, or a sibling agent session — ran the IMAP ingest WITHOUT that env. The opt-out was a foot-gun. This change makes the IE-via-IMAP safety STRUCTURAL: `invest-engine` is in the default exclude set inside _resolve_excluded_providers(). Any code path now skips IE unless the caller explicitly sets `BROKER_SYNC_IMAP_INCLUDE_PROVIDERS=invest-engine`. The `BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS` env still works (additive) for forward-compat in case Schwab etc. ever need similar treatment. INCLUDE wins over both the default exclude set and EXCLUDE env. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
3427f5c9e1
commit
0d23487608
2 changed files with 94 additions and 17 deletions
|
|
@ -151,14 +151,41 @@ def _fetch_all(creds: ImapCreds) -> Iterator[bytes]:
|
||||||
yield raw
|
yield raw
|
||||||
|
|
||||||
|
|
||||||
def fetch_activities(creds: ImapCreds) -> list[Activity]:
|
def _resolve_excluded_providers() -> set[str]:
|
||||||
out: list[Activity] = []
|
"""Return the set of providers the IMAP fetcher must skip.
|
||||||
ie_parsed = schwab_parsed = ie_skipped = skipped = 0
|
|
||||||
exclude = {
|
Default-exclude list is structural — `invest-engine` is ALWAYS skipped
|
||||||
p.strip().lower()
|
unless explicitly opted back in via `BROKER_SYNC_IMAP_INCLUDE_PROVIDERS`.
|
||||||
|
This protects against accidental re-ingestion via any code path that
|
||||||
|
doesn't set the cron's env (e.g. `kubectl run --rm`, devvm `poetry run`,
|
||||||
|
a sibling agent session). See post-mortem 2026-05-27 — the IMAP path
|
||||||
|
re-inserted 39 IE BUYs that had been deduped the previous day, because
|
||||||
|
the safety lived only on the cronjob spec.
|
||||||
|
|
||||||
|
Additional providers can be excluded via
|
||||||
|
`BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS`. `INCLUDE` always wins over
|
||||||
|
`EXCLUDE` and the default skip-list.
|
||||||
|
"""
|
||||||
|
_DEFAULT_EXCLUDED = {"invest-engine", "invest_engine"}
|
||||||
|
extra = {
|
||||||
|
p.strip().lower().replace("_", "-")
|
||||||
for p in os.environ.get("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "").split(",")
|
for p in os.environ.get("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "").split(",")
|
||||||
if p.strip()
|
if p.strip()
|
||||||
}
|
}
|
||||||
|
include = {
|
||||||
|
p.strip().lower().replace("_", "-")
|
||||||
|
for p in os.environ.get("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", "").split(",")
|
||||||
|
if p.strip()
|
||||||
|
}
|
||||||
|
# Canonicalise the default set under the same key normalisation.
|
||||||
|
canonical = {p.replace("_", "-") for p in _DEFAULT_EXCLUDED}
|
||||||
|
return (canonical | extra) - include
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_activities(creds: ImapCreds) -> list[Activity]:
|
||||||
|
out: list[Activity] = []
|
||||||
|
ie_parsed = schwab_parsed = ie_skipped = skipped = 0
|
||||||
|
exclude = _resolve_excluded_providers()
|
||||||
for raw in _fetch_all(creds):
|
for raw in _fetch_all(creds):
|
||||||
try:
|
try:
|
||||||
msg = email.message_from_bytes(raw)
|
msg = email.message_from_bytes(raw)
|
||||||
|
|
@ -167,7 +194,7 @@ def fetch_activities(creds: ImapCreds) -> list[Activity]:
|
||||||
continue
|
continue
|
||||||
sender = _extract_sender(msg)
|
sender = _extract_sender(msg)
|
||||||
if sender in _IE_SENDERS or sender.endswith("@investengine.com"):
|
if sender in _IE_SENDERS or sender.endswith("@investengine.com"):
|
||||||
if "invest-engine" in exclude or "invest_engine" in exclude:
|
if "invest-engine" in exclude:
|
||||||
ie_skipped += 1
|
ie_skipped += 1
|
||||||
continue
|
continue
|
||||||
out.extend(ie_parser.parse_invest_engine_email(raw))
|
out.extend(ie_parser.parse_invest_engine_email(raw))
|
||||||
|
|
|
||||||
|
|
@ -105,10 +105,15 @@ def test_non_ie_activities_passed_through_unchanged() -> None:
|
||||||
assert routed[0].account_type is AccountType.GIA
|
assert routed[0].account_type is AccountType.GIA
|
||||||
|
|
||||||
|
|
||||||
def test_exclude_invest_engine_skips_ie_emails(monkeypatch: MonkeyPatch) -> None:
|
def test_invest_engine_skipped_by_default(monkeypatch: MonkeyPatch) -> None:
|
||||||
"""BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine should skip IE messages
|
"""InvestEngine messages MUST be skipped by default, even with no env set.
|
||||||
so we don't duplicate IE buys already ingested via the bearer-token API path.
|
|
||||||
Schwab routing must remain unaffected."""
|
Post-mortem 2026-05-27: any code path that doesn't set the cron's env
|
||||||
|
(e.g. `kubectl run --rm` or devvm `poetry run`) was re-importing IE
|
||||||
|
BUYs through this IMAP path. The opt-out env var was a foot-gun.
|
||||||
|
Invariant now: structural default skip; opt back in only with
|
||||||
|
BROKER_SYNC_IMAP_INCLUDE_PROVIDERS.
|
||||||
|
"""
|
||||||
from broker_sync.providers import imap as imap_mod
|
from broker_sync.providers import imap as imap_mod
|
||||||
from broker_sync.providers.parsers import invest_engine as ie_parser
|
from broker_sync.providers.parsers import invest_engine as ie_parser
|
||||||
|
|
||||||
|
|
@ -130,15 +135,60 @@ def test_exclude_invest_engine_skips_ie_emails(monkeypatch: MonkeyPatch) -> None
|
||||||
|
|
||||||
creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d")
|
creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d")
|
||||||
|
|
||||||
monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "invest-engine")
|
# Default (no env): IE skipped, Schwab parsed.
|
||||||
out_excluded = imap_mod.fetch_activities(creds)
|
|
||||||
# IE skipped → only the schwab activity is emitted
|
|
||||||
assert len(out_excluded) == 1
|
|
||||||
|
|
||||||
monkeypatch.delenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", raising=False)
|
monkeypatch.delenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", raising=False)
|
||||||
|
monkeypatch.delenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", raising=False)
|
||||||
out_default = imap_mod.fetch_activities(creds)
|
out_default = imap_mod.fetch_activities(creds)
|
||||||
# Both providers fire when env unset
|
assert len(out_default) == 1, "IE must be skipped by default; only Schwab emitted"
|
||||||
assert len(out_default) == 2
|
|
||||||
|
|
||||||
|
def test_invest_engine_opt_in_via_include_env(monkeypatch: MonkeyPatch) -> None:
|
||||||
|
"""Setting BROKER_SYNC_IMAP_INCLUDE_PROVIDERS=invest-engine re-enables
|
||||||
|
IE parsing (escape hatch for the legacy IMAP path)."""
|
||||||
|
from broker_sync.providers import imap as imap_mod
|
||||||
|
from broker_sync.providers.parsers import invest_engine as ie_parser
|
||||||
|
|
||||||
|
ie_email = b"From: noreply@investengine.com\r\n\r\nirrelevant\r\n"
|
||||||
|
schwab_email = b"From: donotreply@schwab.com\r\n\r\n<html></html>\r\n"
|
||||||
|
monkeypatch.setattr(imap_mod, "_fetch_all", lambda _: [ie_email, schwab_email])
|
||||||
|
monkeypatch.setattr(ie_parser, "parse_invest_engine_email", lambda raw: [object()])
|
||||||
|
monkeypatch.setattr(imap_mod, "parse_schwab_email", lambda html: [object()])
|
||||||
|
|
||||||
|
creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d")
|
||||||
|
|
||||||
|
monkeypatch.setenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", "invest-engine")
|
||||||
|
monkeypatch.delenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", raising=False)
|
||||||
|
out = imap_mod.fetch_activities(creds)
|
||||||
|
assert len(out) == 2, "INCLUDE=invest-engine must re-enable IE parsing"
|
||||||
|
|
||||||
|
|
||||||
|
def test_exclude_schwab_still_works(monkeypatch: MonkeyPatch) -> None:
|
||||||
|
"""EXCLUDE env still works for other providers (forward-compat)."""
|
||||||
|
from broker_sync.providers import imap as imap_mod
|
||||||
|
from broker_sync.providers.parsers import invest_engine as ie_parser
|
||||||
|
|
||||||
|
schwab_email = b"From: donotreply@schwab.com\r\n\r\n<html></html>\r\n"
|
||||||
|
monkeypatch.setattr(imap_mod, "_fetch_all", lambda _: [schwab_email])
|
||||||
|
monkeypatch.setattr(ie_parser, "parse_invest_engine_email", lambda raw: [object()])
|
||||||
|
monkeypatch.setattr(imap_mod, "parse_schwab_email", lambda html: [object()])
|
||||||
|
|
||||||
|
creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d")
|
||||||
|
|
||||||
|
monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "schwab")
|
||||||
|
monkeypatch.delenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", raising=False)
|
||||||
|
out = imap_mod.fetch_activities(creds)
|
||||||
|
assert len(out) == 0, "Schwab must be skipped when in EXCLUDE list"
|
||||||
|
|
||||||
|
|
||||||
|
def test_include_overrides_default_and_exclude(monkeypatch: MonkeyPatch) -> None:
|
||||||
|
"""INCLUDE wins over both the structural default and EXCLUDE env var."""
|
||||||
|
from broker_sync.providers import imap as imap_mod
|
||||||
|
|
||||||
|
monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "invest-engine,schwab")
|
||||||
|
monkeypatch.setenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", "invest-engine")
|
||||||
|
resolved = imap_mod._resolve_excluded_providers()
|
||||||
|
assert "invest-engine" not in resolved
|
||||||
|
assert "schwab" in resolved
|
||||||
|
|
||||||
|
|
||||||
def test_schwab_subdomain_sender_matches() -> None:
|
def test_schwab_subdomain_sender_matches() -> None:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue