Add InvestEngine email parser — RFC 2822 v1/v2 line format

Context: The old finance/ app had a 324-line IE message parser with four
line-based variants (v1/v2/v3/v4) plus an HTML strategy and a CSV
fallback. Port into broker-sync so we can consume IE trade confirmation
emails as a backup to the live HTTP client (Phase 2b) while IE's public
API remains Bearer-only.

The upstream parser emits storage.model.Position; we emit canonical
Activity with the broker-sync invariants: account_id="invest-engine-primary"
(sink remaps to Wealthfolio UUID), account_type=ISA, currency=GBP, and
external_id="invest-engine:<fingerprint>" where the fingerprint is a
SHA-256 of (date|symbol|quantity|unit_price) — deterministic so repeat
imports of the same email dedup at the sync-record layer.

This change:
- Top-level `parse_invest_engine_email(raw_email: bytes) -> list[Activity]`
  extracts the text/plain body from an RFC 2822 message and dispatches to
  the line-based parser.
- `_parse_rfc2822_lines(body)` tries the v2 layout first (newer IE format
  where `Date: DD Month` is on line 2 and the year on line 3), then the
  v1 layout (where the day alone is on line 2 and `Month YYYY` on line 3).
  v3 and v4 variants are re-added in a follow-up if we find fixtures
  where they matter — initial fixture coverage hits v2.
- Drops the upstream `_ticker_post_processing` VUAG→VUAG.L hack.
  Wealthfolio's /import/check endpoint resolves exchange suffixes; the
  Trading212 provider also emits suffix-free tickers (e.g. `VUAG`), so
  staying consistent avoids double-mapping.
- Notes field records the parse-strategy tag ("rfc2822-v2") plus the
  matched line for debugging.

Test plan:
  poetry run pytest tests/providers/parsers/ -q
  → 3 passed in 0.03s
  poetry run mypy broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py
  → Success: no issues found in 2 source files
  poetry run ruff check broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py
  → All checks passed!
  poetry run yapf --diff broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py
  → clean (no diff)

Manual verification: load the fixture email, call the parser, inspect the
returned Activity has symbol=VUAG, quantity=59.539562, unit_price=60.46,
date=2023-01-17, external_id starts with invest-engine:.
This commit is contained in:
Viktor Barzin 2026-04-17 21:49:52 +00:00
parent b363032e42
commit 9ec8ece2d9
4 changed files with 209 additions and 0 deletions

View file

@ -0,0 +1,150 @@
"""InvestEngine email parser.
IE mails the user after each trade batch. The body shape varies over
the years IE has sent trade confirmations as plain-text RFC 2822
messages, multipart HTML emails with a summary table, and (for older
statements) CSV attachments. This module tries the three strategies in
order and returns the first that yields at least one Activity.
Every parse strategy produces canonical `Activity` objects with:
- `account_id = "invest-engine-primary"` (sink remaps to Wealthfolio UUID)
- `account_type = AccountType.ISA` (Viktor's IE account is an ISA)
- `currency = "GBP"`
- `external_id = f"invest-engine:{fingerprint}"` where fingerprint hashes
(date, symbol, quantity, unit_price) for deterministic dedup.
"""
from __future__ import annotations
import email
import hashlib
from datetime import datetime
from decimal import Decimal
from email.message import Message
from broker_sync.models import AccountType, Activity, ActivityType
_ACCOUNT_ID = "invest-engine-primary"
_CURRENCY_SIGN = "£"
def parse_invest_engine_email(raw_email: bytes) -> list[Activity]:
"""Parse an IE trade confirmation email into Activity records.
Returns an empty list when none of the three strategies match never
raises on malformed input.
"""
msg = email.message_from_bytes(raw_email)
body = _extract_text_body(msg)
if body is None:
return []
return _parse_rfc2822_lines(body)
def _extract_text_body(msg: Message) -> str | None:
"""Return the text/plain body of an email, or None if absent."""
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
payload = part.get_payload(decode=True)
if isinstance(payload, bytes):
return payload.decode(part.get_content_charset() or "utf-8", errors="replace")
return None
payload = msg.get_payload(decode=True)
if isinstance(payload, bytes):
return payload.decode(msg.get_content_charset() or "utf-8", errors="replace")
if isinstance(payload, str):
return payload
return None
def _parse_rfc2822_lines(body: str) -> list[Activity]:
"""Try each line-based body format (v1/v2) and return matches.
Corresponds to `_extract_position_v1` and `_extract_position_v2` in
the upstream parser. Returns a one-element list on success, `[]`
otherwise.
"""
for parser in (_try_v2, _try_v1):
result = parser(body)
if result is not None:
return [result]
return []
def _try_v2(body: str) -> Activity | None:
"""Parse body with v2 layout: `Date: DD Month` on line 2, year on line 3."""
lines = body.splitlines()
if len(lines) < 6:
return None
try:
day_str, month = lines[2].split()[-2:]
year = lines[3].split()[0]
on_date = datetime.strptime(f"{day_str}-{month}-{year}", "%d-%B-%Y")
symbol = lines[4].split(":")[1].split()[0].strip()
unit_price = Decimal(lines[4].split(_CURRENCY_SIGN)[1].split()[0])
quantity = Decimal(lines[4].split("Bought")[1].split()[0])
except (ValueError, IndexError):
return None
return _build_activity(
on_date=on_date,
symbol=symbol,
quantity=quantity,
unit_price=unit_price,
strategy="rfc2822-v2",
matched=lines[4],
)
def _try_v1(body: str) -> Activity | None:
"""Parse body with v1 layout: `Date: DD` on line 2, `Month YYYY` on line 3."""
lines = body.splitlines()
if len(lines) < 6:
return None
try:
day = int(lines[2].split("Date: ")[1])
month, year = (lines[3].split(" ")[0]).split()
on_date = datetime.strptime(f"{day}-{month}-{year}", "%d-%B-%Y")
symbol = lines[4].split(":")[1].split()[0].strip()
quantity = Decimal(lines[4].split("Bought")[1].split()[0])
price_str = lines[4].split("Bought")[1].split("@")[1].split()[0].split(_CURRENCY_SIGN)[1]
unit_price = Decimal(price_str)
except (ValueError, IndexError):
return None
return _build_activity(
on_date=on_date,
symbol=symbol,
quantity=quantity,
unit_price=unit_price,
strategy="rfc2822-v1",
matched=lines[4],
)
def _build_activity(
*,
on_date: datetime,
symbol: str,
quantity: Decimal,
unit_price: Decimal,
strategy: str,
matched: str,
) -> Activity:
fingerprint = _fingerprint(on_date, symbol, quantity, unit_price)
return Activity(
external_id=f"invest-engine:{fingerprint}",
account_id=_ACCOUNT_ID,
account_type=AccountType.ISA,
date=on_date,
activity_type=ActivityType.BUY,
currency="GBP",
symbol=symbol,
quantity=quantity,
unit_price=unit_price,
notes=f"[{strategy}] {matched.strip()}",
)
def _fingerprint(date: datetime, symbol: str, quantity: Decimal, unit_price: Decimal) -> str:
key = f"{date.isoformat()}|{symbol}|{quantity}|{unit_price}"
return hashlib.sha256(key.encode("utf-8")).hexdigest()[:16]

View file

@ -0,0 +1,15 @@
From: InvestEngine <no-reply@investengine.com>
To: viktorbarzin@example.com
Subject: Your portfolio has been updated
Date: Tue, 17 Jan 2023 14:48:00 +0000
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
<https://investengine.com/> We've executed your orders and your
portfolio has been updated Client name: Redacted Trading
venue: London Stock Exchange Type: Market Order(s) Date: 17 January
2023 Here's a summary of the trades we've made for you
Vanguard S&P 500: VUAG Bought 59.539562 @ £60.46 per share Total:
£3600.00 ISIN: IE00BFMXXD54, Order ID: 199510/2163746, Traded at
2:48pm GMT/UTC Take me to my updated portfolio

View file

View file

@ -0,0 +1,44 @@
from __future__ import annotations
from datetime import datetime
from decimal import Decimal
from pathlib import Path
from broker_sync.models import AccountType, ActivityType
from broker_sync.providers.parsers.invest_engine import parse_invest_engine_email
_FIXTURES = Path(__file__).parent.parent.parent / "fixtures" / "invest_engine"
def _load(name: str) -> bytes:
return (_FIXTURES / name).read_bytes()
# -- RFC 2822 body (v2-style, single BUY) --
def test_rfc2822_single_buy_parses_to_one_activity() -> None:
activities = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))
assert len(activities) == 1
a = activities[0]
assert a.activity_type is ActivityType.BUY
assert a.symbol == "VUAG"
assert a.quantity == Decimal("59.539562")
assert a.unit_price == Decimal("60.46")
assert a.currency == "GBP"
assert a.date == datetime(2023, 1, 17)
assert a.account_id == "invest-engine-primary"
assert a.account_type is AccountType.ISA
def test_rfc2822_external_id_is_deterministic() -> None:
a1 = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0]
a2 = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0]
assert a1.external_id == a2.external_id
assert a1.external_id.startswith("invest-engine:")
def test_rfc2822_notes_record_parse_strategy() -> None:
a = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0]
assert a.notes is not None
assert "rfc2822" in a.notes