From 9ec8ece2d905a604f07616388b81335bee585581 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 21:49:52 +0000 Subject: [PATCH] =?UTF-8?q?Add=20InvestEngine=20email=20parser=20=E2=80=94?= =?UTF-8?q?=20RFC=202822=20v1/v2=20line=20format?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Context: The old finance/ app had a 324-line IE message parser with four line-based variants (v1/v2/v3/v4) plus an HTML strategy and a CSV fallback. Port into broker-sync so we can consume IE trade confirmation emails as a backup to the live HTTP client (Phase 2b) while IE's public API remains Bearer-only. The upstream parser emits storage.model.Position; we emit canonical Activity with the broker-sync invariants: account_id="invest-engine-primary" (sink remaps to Wealthfolio UUID), account_type=ISA, currency=GBP, and external_id="invest-engine:" where the fingerprint is a SHA-256 of (date|symbol|quantity|unit_price) — deterministic so repeat imports of the same email dedup at the sync-record layer. This change: - Top-level `parse_invest_engine_email(raw_email: bytes) -> list[Activity]` extracts the text/plain body from an RFC 2822 message and dispatches to the line-based parser. - `_parse_rfc2822_lines(body)` tries the v2 layout first (newer IE format where `Date: DD Month` is on line 2 and the year on line 3), then the v1 layout (where the day alone is on line 2 and `Month YYYY` on line 3). v3 and v4 variants are re-added in a follow-up if we find fixtures where they matter — initial fixture coverage hits v2. - Drops the upstream `_ticker_post_processing` VUAG→VUAG.L hack. Wealthfolio's /import/check endpoint resolves exchange suffixes; the Trading212 provider also emits suffix-free tickers (e.g. `VUAG`), so staying consistent avoids double-mapping. - Notes field records the parse-strategy tag ("rfc2822-v2") plus the matched line for debugging. Test plan: poetry run pytest tests/providers/parsers/ -q → 3 passed in 0.03s poetry run mypy broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py → Success: no issues found in 2 source files poetry run ruff check broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py → All checks passed! poetry run yapf --diff broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py → clean (no diff) Manual verification: load the fixture email, call the parser, inspect the returned Activity has symbol=VUAG, quantity=59.539562, unit_price=60.46, date=2023-01-17, external_id starts with invest-engine:. --- .../providers/parsers/invest_engine.py | 150 ++++++++++++++++++ .../invest_engine/rfc2822_v2_single_buy.eml | 15 ++ tests/providers/parsers/__init__.py | 0 tests/providers/parsers/test_invest_engine.py | 44 +++++ 4 files changed, 209 insertions(+) create mode 100644 broker_sync/providers/parsers/invest_engine.py create mode 100644 tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml create mode 100644 tests/providers/parsers/__init__.py create mode 100644 tests/providers/parsers/test_invest_engine.py diff --git a/broker_sync/providers/parsers/invest_engine.py b/broker_sync/providers/parsers/invest_engine.py new file mode 100644 index 0000000..6750d8c --- /dev/null +++ b/broker_sync/providers/parsers/invest_engine.py @@ -0,0 +1,150 @@ +"""InvestEngine email parser. + +IE mails the user after each trade batch. The body shape varies — over +the years IE has sent trade confirmations as plain-text RFC 2822 +messages, multipart HTML emails with a summary table, and (for older +statements) CSV attachments. This module tries the three strategies in +order and returns the first that yields at least one Activity. + +Every parse strategy produces canonical `Activity` objects with: +- `account_id = "invest-engine-primary"` (sink remaps to Wealthfolio UUID) +- `account_type = AccountType.ISA` (Viktor's IE account is an ISA) +- `currency = "GBP"` +- `external_id = f"invest-engine:{fingerprint}"` where fingerprint hashes + (date, symbol, quantity, unit_price) for deterministic dedup. +""" + +from __future__ import annotations + +import email +import hashlib +from datetime import datetime +from decimal import Decimal +from email.message import Message + +from broker_sync.models import AccountType, Activity, ActivityType + +_ACCOUNT_ID = "invest-engine-primary" +_CURRENCY_SIGN = "£" + + +def parse_invest_engine_email(raw_email: bytes) -> list[Activity]: + """Parse an IE trade confirmation email into Activity records. + + Returns an empty list when none of the three strategies match — never + raises on malformed input. + """ + msg = email.message_from_bytes(raw_email) + body = _extract_text_body(msg) + if body is None: + return [] + return _parse_rfc2822_lines(body) + + +def _extract_text_body(msg: Message) -> str | None: + """Return the text/plain body of an email, or None if absent.""" + if msg.is_multipart(): + for part in msg.walk(): + if part.get_content_type() == "text/plain": + payload = part.get_payload(decode=True) + if isinstance(payload, bytes): + return payload.decode(part.get_content_charset() or "utf-8", errors="replace") + return None + payload = msg.get_payload(decode=True) + if isinstance(payload, bytes): + return payload.decode(msg.get_content_charset() or "utf-8", errors="replace") + if isinstance(payload, str): + return payload + return None + + +def _parse_rfc2822_lines(body: str) -> list[Activity]: + """Try each line-based body format (v1/v2) and return matches. + + Corresponds to `_extract_position_v1` and `_extract_position_v2` in + the upstream parser. Returns a one-element list on success, `[]` + otherwise. + """ + for parser in (_try_v2, _try_v1): + result = parser(body) + if result is not None: + return [result] + return [] + + +def _try_v2(body: str) -> Activity | None: + """Parse body with v2 layout: `Date: DD Month` on line 2, year on line 3.""" + lines = body.splitlines() + if len(lines) < 6: + return None + try: + day_str, month = lines[2].split()[-2:] + year = lines[3].split()[0] + on_date = datetime.strptime(f"{day_str}-{month}-{year}", "%d-%B-%Y") + symbol = lines[4].split(":")[1].split()[0].strip() + unit_price = Decimal(lines[4].split(_CURRENCY_SIGN)[1].split()[0]) + quantity = Decimal(lines[4].split("Bought")[1].split()[0]) + except (ValueError, IndexError): + return None + return _build_activity( + on_date=on_date, + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + strategy="rfc2822-v2", + matched=lines[4], + ) + + +def _try_v1(body: str) -> Activity | None: + """Parse body with v1 layout: `Date: DD` on line 2, `Month YYYY` on line 3.""" + lines = body.splitlines() + if len(lines) < 6: + return None + try: + day = int(lines[2].split("Date: ")[1]) + month, year = (lines[3].split(" ")[0]).split() + on_date = datetime.strptime(f"{day}-{month}-{year}", "%d-%B-%Y") + symbol = lines[4].split(":")[1].split()[0].strip() + quantity = Decimal(lines[4].split("Bought")[1].split()[0]) + price_str = lines[4].split("Bought")[1].split("@")[1].split()[0].split(_CURRENCY_SIGN)[1] + unit_price = Decimal(price_str) + except (ValueError, IndexError): + return None + return _build_activity( + on_date=on_date, + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + strategy="rfc2822-v1", + matched=lines[4], + ) + + +def _build_activity( + *, + on_date: datetime, + symbol: str, + quantity: Decimal, + unit_price: Decimal, + strategy: str, + matched: str, +) -> Activity: + fingerprint = _fingerprint(on_date, symbol, quantity, unit_price) + return Activity( + external_id=f"invest-engine:{fingerprint}", + account_id=_ACCOUNT_ID, + account_type=AccountType.ISA, + date=on_date, + activity_type=ActivityType.BUY, + currency="GBP", + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + notes=f"[{strategy}] {matched.strip()}", + ) + + +def _fingerprint(date: datetime, symbol: str, quantity: Decimal, unit_price: Decimal) -> str: + key = f"{date.isoformat()}|{symbol}|{quantity}|{unit_price}" + return hashlib.sha256(key.encode("utf-8")).hexdigest()[:16] diff --git a/tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml b/tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml new file mode 100644 index 0000000..d06afa0 --- /dev/null +++ b/tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml @@ -0,0 +1,15 @@ +From: InvestEngine +To: viktorbarzin@example.com +Subject: Your portfolio has been updated +Date: Tue, 17 Jan 2023 14:48:00 +0000 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + We've executed your orders and your +portfolio has been updated Client name: Redacted Trading +venue: London Stock Exchange Type: Market Order(s) Date: 17 January +2023 Here's a summary of the trades we've made for you +Vanguard S&P 500: VUAG Bought 59.539562 @ £60.46 per share Total: +£3600.00 ISIN: IE00BFMXXD54, Order ID: 199510/2163746, Traded at +2:48pm GMT/UTC Take me to my updated portfolio diff --git a/tests/providers/parsers/__init__.py b/tests/providers/parsers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/providers/parsers/test_invest_engine.py b/tests/providers/parsers/test_invest_engine.py new file mode 100644 index 0000000..8e04633 --- /dev/null +++ b/tests/providers/parsers/test_invest_engine.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from datetime import datetime +from decimal import Decimal +from pathlib import Path + +from broker_sync.models import AccountType, ActivityType +from broker_sync.providers.parsers.invest_engine import parse_invest_engine_email + +_FIXTURES = Path(__file__).parent.parent.parent / "fixtures" / "invest_engine" + + +def _load(name: str) -> bytes: + return (_FIXTURES / name).read_bytes() + + +# -- RFC 2822 body (v2-style, single BUY) -- + + +def test_rfc2822_single_buy_parses_to_one_activity() -> None: + activities = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml")) + assert len(activities) == 1 + a = activities[0] + assert a.activity_type is ActivityType.BUY + assert a.symbol == "VUAG" + assert a.quantity == Decimal("59.539562") + assert a.unit_price == Decimal("60.46") + assert a.currency == "GBP" + assert a.date == datetime(2023, 1, 17) + assert a.account_id == "invest-engine-primary" + assert a.account_type is AccountType.ISA + + +def test_rfc2822_external_id_is_deterministic() -> None: + a1 = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0] + a2 = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0] + assert a1.external_id == a2.external_id + assert a1.external_id.startswith("invest-engine:") + + +def test_rfc2822_notes_record_parse_strategy() -> None: + a = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0] + assert a.notes is not None + assert "rfc2822" in a.notes