From 87526898e6afcedac4fc8cd18e283467dccd1e4a Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 22:02:48 +0000 Subject: [PATCH] =?UTF-8?q?Pin=20InvestEngine=20parser=20failure=20modes?= =?UTF-8?q?=20=E2=80=94=20empty-on-junk=20+=20partial-match?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Context: The port's graceful-failure contract was implicit in the way each strategy returns None/[] on malformed input, but without tests it was an accidental property that could regress silently. Codify it. Two invariants, each backed by a fixture: 1. Junk email → empty list, never raise. `unparseable.eml` is a pure-marketing IE newsletter with no order data. All three strategies try and fail; parse_invest_engine_email returns []. No exception leaks. 2. Partial HTML email → intact orders only. `html_partial_match.eml` has two nested summary tables: one with a valid VUAG order, one that is missing both the ticker and "Bought N @ £P" rows (simulates IE dropping content mid-render). The parser returns just the VUAG order. No implementation change needed — the behaviour existed as a side effect of _try_html_summary_table returning None on missing fields. These tests lock it down so future refactors can't quietly break it. Test plan: poetry run pytest tests/providers/parsers/ -q → 8 passed in 0.19s poetry run mypy broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py → clean poetry run ruff check broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py → All checks passed! poetry run yapf --diff → clean (no diff) Manual verification: - Load unparseable.eml → parse returns []. - Load html_partial_match.eml → parse returns exactly 1 activity (VUAG). --- .../invest_engine/html_partial_match.eml | 40 +++++++++++++++++++ tests/fixtures/invest_engine/unparseable.eml | 15 +++++++ tests/providers/parsers/test_invest_engine.py | 17 ++++++++ 3 files changed, 72 insertions(+) create mode 100644 tests/fixtures/invest_engine/html_partial_match.eml create mode 100644 tests/fixtures/invest_engine/unparseable.eml diff --git a/tests/fixtures/invest_engine/html_partial_match.eml b/tests/fixtures/invest_engine/html_partial_match.eml new file mode 100644 index 0000000..fc41aa1 --- /dev/null +++ b/tests/fixtures/invest_engine/html_partial_match.eml @@ -0,0 +1,40 @@ +From: InvestEngine +To: viktorbarzin@example.com +Subject: Your portfolio has been updated +Date: Wed, 15 Apr 2026 11:00:00 +0000 +MIME-Version: 1.0 +Content-Type: multipart/alternative; boundary="----=_Part_PM" + +------=_Part_PM +Content-Type: text/plain; charset=UTF-8 + +(HTML-only view — your client does not render HTML emails.) + +------=_Part_PM +Content-Type: text/html; charset=UTF-8 + + +
Logo
+ + + + + + + + +
Date: 15 April 2026
+ + + + +
Vanguard S&P 500: VUAG
Bought 3.0 @ £61.25 per share
Total: £183.75
+
+ + + +
Some broken order with no ticker and no bought line
(Malformed — IE dropped a row mid-render)
+
+ + +------=_Part_PM-- diff --git a/tests/fixtures/invest_engine/unparseable.eml b/tests/fixtures/invest_engine/unparseable.eml new file mode 100644 index 0000000..933f99a --- /dev/null +++ b/tests/fixtures/invest_engine/unparseable.eml @@ -0,0 +1,15 @@ +From: InvestEngine +To: viktorbarzin@example.com +Subject: InvestEngine newsletter +Date: Thu, 10 Apr 2025 12:00:00 +0000 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 + +Hi Viktor, + +This is a newsletter, not a trade confirmation. There is no structured +order data here — just marketing copy and a promo for a new feature we +are rolling out. Thanks for being a customer. + +Cheers, +The InvestEngine team diff --git a/tests/providers/parsers/test_invest_engine.py b/tests/providers/parsers/test_invest_engine.py index 8ef81d3..9c30889 100644 --- a/tests/providers/parsers/test_invest_engine.py +++ b/tests/providers/parsers/test_invest_engine.py @@ -89,3 +89,20 @@ def test_csv_attachment_parses_all_rows() -> None: assert a.account_type is AccountType.ISA assert a.notes is not None assert "csv" in a.notes + + +# -- graceful failure modes -- + + +def test_unparseable_email_returns_empty_list() -> None: + assert parse_invest_engine_email(_load("unparseable.eml")) == [] + + +def test_html_partial_match_returns_only_parseable_orders() -> None: + activities = parse_invest_engine_email(_load("html_partial_match.eml")) + assert len(activities) == 1 + a = activities[0] + assert a.symbol == "VUAG" + assert a.quantity == Decimal("3.0") + assert a.unit_price == Decimal("61.25") + assert a.date == datetime(2026, 4, 15)