Merge ie-email-parser: HTML + CSV fallbacks + failure-mode tests

# Conflicts:
#	broker_sync/providers/parsers/invest_engine.py
#	tests/providers/parsers/test_invest_engine.py
This commit is contained in:
Viktor Barzin 2026-04-17 22:06:29 +00:00
commit 1aa60ce348
6 changed files with 390 additions and 15 deletions

View file

@ -0,0 +1,22 @@
From: InvestEngine <no-reply@investengine.com>
To: viktorbarzin@example.com
Subject: Your InvestEngine statement
Date: Mon, 07 Apr 2025 09:00:00 +0000
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="----=_MIXED_1"
------=_MIXED_1
Content-Type: text/plain; charset=UTF-8
Your monthly statement is attached as a CSV.
------=_MIXED_1
Content-Type: text/csv; charset=UTF-8; name="statement.csv"
Content-Disposition: attachment; filename="statement.csv"
ticker,unit_price,quantity,date,currency
VUAG,63.21,12.5,2025-04-02,GBP
SWDA,86.40,4.75,2025-04-03,GBP
VUSA,90.10,1.0,2025-04-04,GBP
------=_MIXED_1--

View file

@ -0,0 +1,40 @@
From: InvestEngine <no-reply@investengine.com>
To: viktorbarzin@example.com
Subject: Your portfolio has been updated
Date: Wed, 15 Apr 2026 11:00:00 +0000
MIME-Version: 1.0
Content-Type: multipart/alternative; boundary="----=_Part_PM"
------=_Part_PM
Content-Type: text/plain; charset=UTF-8
(HTML-only view — your client does not render HTML emails.)
------=_Part_PM
Content-Type: text/html; charset=UTF-8
<html><body>
<table><tr><td>Logo</td></tr></table>
<table>
<tr><td> Date: 15 April 2026 </td></tr>
<tr>
<td>
<table>
<tr><td>Vanguard S&amp;P 500: VUAG</td></tr>
<tr><td>Bought 3.0 @ &pound;61.25 per share</td></tr>
<tr><td>Total: &pound;183.75</td></tr>
</table>
</td>
</tr>
<tr>
<td>
<table>
<tr><td>Some broken order with no ticker and no bought line</td></tr>
<tr><td>(Malformed — IE dropped a row mid-render)</td></tr>
</table>
</td>
</tr>
</table>
</body></html>
------=_Part_PM--

View file

@ -0,0 +1,55 @@
From: InvestEngine <no-reply@investengine.com>
To: viktorbarzin@example.com
Subject: Your portfolio has been updated
Date: Wed, 01 Apr 2026 09:15:00 +0000
MIME-Version: 1.0
Content-Type: multipart/alternative; boundary="----=_Part_1"
------=_Part_1
Content-Type: text/plain; charset=UTF-8
(HTML-only view — your client does not render HTML emails.)
------=_Part_1
Content-Type: text/html; charset=UTF-8
<html><head><title>InvestEngine</title></head><body>
<table><tr><td>Header logo</td></tr></table>
<table>
<tr><td>Client name: Redacted</td></tr>
<tr><td>Trading venue: London Stock Exchange</td></tr>
<tr><td>Type: Market Order(s)</td></tr>
<tr><td>Here's a summary of the trades we've made for you</td></tr>
<tr>
<td>a</td><td>b</td><td>c</td><td>d</td>
<td> Date: 01 April 2026 </td>
</tr>
<tr><td>filler</td></tr>
<tr><td>filler</td></tr>
<tr><td>filler</td></tr>
<tr><td>filler</td></tr>
<tr><td>filler</td></tr>
<tr>
<td>
<table>
<tr><td>Vanguard S&amp;P 500: VUAG</td></tr>
<tr><td>Bought 10.5 @ &pound;62.10 per share</td></tr>
<tr><td>Total: &pound;652.05</td></tr>
<tr><td>ISIN: IE00BFMXXD54, Order ID: 300000/4000001, Traded at 9:05am GMT</td></tr>
</table>
</td>
</tr>
<tr>
<td>
<table>
<tr><td>iShares Core MSCI World: SWDA</td></tr>
<tr><td>Bought 2.25 @ &pound;85.40 per share</td></tr>
<tr><td>Total: &pound;192.15</td></tr>
<tr><td>ISIN: IE00B4L5Y983, Order ID: 300000/4000002, Traded at 9:06am GMT</td></tr>
</table>
</td>
</tr>
</table>
</body></html>
------=_Part_1--

View file

@ -0,0 +1,15 @@
From: InvestEngine <no-reply@investengine.com>
To: viktorbarzin@example.com
Subject: InvestEngine newsletter
Date: Thu, 10 Apr 2025 12:00:00 +0000
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Hi Viktor,
This is a newsletter, not a trade confirmation. There is no structured
order data here — just marketing copy and a promo for a new feature we
are rolling out. Thanks for being a customer.
Cheers,
The InvestEngine team

View file

@ -42,3 +42,67 @@ def test_rfc2822_notes_record_parse_strategy() -> None:
a = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0]
assert a.notes is not None
assert "rfc2822" in a.notes
# -- HTML table body (multipart/alternative, two orders) --
def test_html_body_parses_both_orders() -> None:
activities = parse_invest_engine_email(_load("html_two_orders.eml"))
assert len(activities) == 2
a, b = activities
assert a.symbol == "VUAG"
assert a.quantity == Decimal("10.5")
assert a.unit_price == Decimal("62.10")
assert a.date == datetime(2026, 4, 1)
assert a.account_id == "invest-engine-primary"
assert a.account_type is AccountType.ISA
assert a.activity_type is ActivityType.BUY
assert b.symbol == "SWDA"
assert b.quantity == Decimal("2.25")
assert b.unit_price == Decimal("85.40")
assert b.date == datetime(2026, 4, 1)
def test_html_notes_record_html_strategy() -> None:
a = parse_invest_engine_email(_load("html_two_orders.eml"))[0]
assert a.notes is not None
assert "html" in a.notes
# -- CSV attachment body --
def test_csv_attachment_parses_all_rows() -> None:
activities = parse_invest_engine_email(_load("csv_attachment.eml"))
assert len(activities) == 3
by_symbol = {a.symbol: a for a in activities}
assert by_symbol["VUAG"].quantity == Decimal("12.5")
assert by_symbol["VUAG"].unit_price == Decimal("63.21")
assert by_symbol["VUAG"].date == datetime(2025, 4, 2)
assert by_symbol["SWDA"].quantity == Decimal("4.75")
assert by_symbol["VUSA"].date == datetime(2025, 4, 4)
for a in activities:
assert a.activity_type is ActivityType.BUY
assert a.currency == "GBP"
assert a.account_id == "invest-engine-primary"
assert a.account_type is AccountType.ISA
assert a.notes is not None
assert "csv" in a.notes
# -- graceful failure modes --
def test_unparseable_email_returns_empty_list() -> None:
assert parse_invest_engine_email(_load("unparseable.eml")) == []
def test_html_partial_match_returns_only_parseable_orders() -> None:
activities = parse_invest_engine_email(_load("html_partial_match.eml"))
assert len(activities) == 1
a = activities[0]
assert a.symbol == "VUAG"
assert a.quantity == Decimal("3.0")
assert a.unit_price == Decimal("61.25")
assert a.date == datetime(2026, 4, 15)