Add InvestEngine email parser — RFC 2822 v1/v2 line format
Context: The old finance/ app had a 324-line IE message parser with four
line-based variants (v1/v2/v3/v4) plus an HTML strategy and a CSV
fallback. Port into broker-sync so we can consume IE trade confirmation
emails as a backup to the live HTTP client (Phase 2b) while IE's public
API remains Bearer-only.
The upstream parser emits storage.model.Position; we emit canonical
Activity with the broker-sync invariants: account_id="invest-engine-primary"
(sink remaps to Wealthfolio UUID), account_type=ISA, currency=GBP, and
external_id="invest-engine:<fingerprint>" where the fingerprint is a
SHA-256 of (date|symbol|quantity|unit_price) — deterministic so repeat
imports of the same email dedup at the sync-record layer.
This change:
- Top-level `parse_invest_engine_email(raw_email: bytes) -> list[Activity]`
extracts the text/plain body from an RFC 2822 message and dispatches to
the line-based parser.
- `_parse_rfc2822_lines(body)` tries the v2 layout first (newer IE format
where `Date: DD Month` is on line 2 and the year on line 3), then the
v1 layout (where the day alone is on line 2 and `Month YYYY` on line 3).
v3 and v4 variants are re-added in a follow-up if we find fixtures
where they matter — initial fixture coverage hits v2.
- Drops the upstream `_ticker_post_processing` VUAG→VUAG.L hack.
Wealthfolio's /import/check endpoint resolves exchange suffixes; the
Trading212 provider also emits suffix-free tickers (e.g. `VUAG`), so
staying consistent avoids double-mapping.
- Notes field records the parse-strategy tag ("rfc2822-v2") plus the
matched line for debugging.
Test plan:
poetry run pytest tests/providers/parsers/ -q
→ 3 passed in 0.03s
poetry run mypy broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py
→ Success: no issues found in 2 source files
poetry run ruff check broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py
→ All checks passed!
poetry run yapf --diff broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py
→ clean (no diff)
Manual verification: load the fixture email, call the parser, inspect the
returned Activity has symbol=VUAG, quantity=59.539562, unit_price=60.46,
date=2023-01-17, external_id starts with invest-engine:.
This commit is contained in:
parent
b363032e42
commit
9ec8ece2d9
4 changed files with 209 additions and 0 deletions
150
broker_sync/providers/parsers/invest_engine.py
Normal file
150
broker_sync/providers/parsers/invest_engine.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
"""InvestEngine email parser.
|
||||
|
||||
IE mails the user after each trade batch. The body shape varies — over
|
||||
the years IE has sent trade confirmations as plain-text RFC 2822
|
||||
messages, multipart HTML emails with a summary table, and (for older
|
||||
statements) CSV attachments. This module tries the three strategies in
|
||||
order and returns the first that yields at least one Activity.
|
||||
|
||||
Every parse strategy produces canonical `Activity` objects with:
|
||||
- `account_id = "invest-engine-primary"` (sink remaps to Wealthfolio UUID)
|
||||
- `account_type = AccountType.ISA` (Viktor's IE account is an ISA)
|
||||
- `currency = "GBP"`
|
||||
- `external_id = f"invest-engine:{fingerprint}"` where fingerprint hashes
|
||||
(date, symbol, quantity, unit_price) for deterministic dedup.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import email
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from email.message import Message
|
||||
|
||||
from broker_sync.models import AccountType, Activity, ActivityType
|
||||
|
||||
_ACCOUNT_ID = "invest-engine-primary"
|
||||
_CURRENCY_SIGN = "£"
|
||||
|
||||
|
||||
def parse_invest_engine_email(raw_email: bytes) -> list[Activity]:
|
||||
"""Parse an IE trade confirmation email into Activity records.
|
||||
|
||||
Returns an empty list when none of the three strategies match — never
|
||||
raises on malformed input.
|
||||
"""
|
||||
msg = email.message_from_bytes(raw_email)
|
||||
body = _extract_text_body(msg)
|
||||
if body is None:
|
||||
return []
|
||||
return _parse_rfc2822_lines(body)
|
||||
|
||||
|
||||
def _extract_text_body(msg: Message) -> str | None:
|
||||
"""Return the text/plain body of an email, or None if absent."""
|
||||
if msg.is_multipart():
|
||||
for part in msg.walk():
|
||||
if part.get_content_type() == "text/plain":
|
||||
payload = part.get_payload(decode=True)
|
||||
if isinstance(payload, bytes):
|
||||
return payload.decode(part.get_content_charset() or "utf-8", errors="replace")
|
||||
return None
|
||||
payload = msg.get_payload(decode=True)
|
||||
if isinstance(payload, bytes):
|
||||
return payload.decode(msg.get_content_charset() or "utf-8", errors="replace")
|
||||
if isinstance(payload, str):
|
||||
return payload
|
||||
return None
|
||||
|
||||
|
||||
def _parse_rfc2822_lines(body: str) -> list[Activity]:
|
||||
"""Try each line-based body format (v1/v2) and return matches.
|
||||
|
||||
Corresponds to `_extract_position_v1` and `_extract_position_v2` in
|
||||
the upstream parser. Returns a one-element list on success, `[]`
|
||||
otherwise.
|
||||
"""
|
||||
for parser in (_try_v2, _try_v1):
|
||||
result = parser(body)
|
||||
if result is not None:
|
||||
return [result]
|
||||
return []
|
||||
|
||||
|
||||
def _try_v2(body: str) -> Activity | None:
|
||||
"""Parse body with v2 layout: `Date: DD Month` on line 2, year on line 3."""
|
||||
lines = body.splitlines()
|
||||
if len(lines) < 6:
|
||||
return None
|
||||
try:
|
||||
day_str, month = lines[2].split()[-2:]
|
||||
year = lines[3].split()[0]
|
||||
on_date = datetime.strptime(f"{day_str}-{month}-{year}", "%d-%B-%Y")
|
||||
symbol = lines[4].split(":")[1].split()[0].strip()
|
||||
unit_price = Decimal(lines[4].split(_CURRENCY_SIGN)[1].split()[0])
|
||||
quantity = Decimal(lines[4].split("Bought")[1].split()[0])
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
return _build_activity(
|
||||
on_date=on_date,
|
||||
symbol=symbol,
|
||||
quantity=quantity,
|
||||
unit_price=unit_price,
|
||||
strategy="rfc2822-v2",
|
||||
matched=lines[4],
|
||||
)
|
||||
|
||||
|
||||
def _try_v1(body: str) -> Activity | None:
|
||||
"""Parse body with v1 layout: `Date: DD` on line 2, `Month YYYY` on line 3."""
|
||||
lines = body.splitlines()
|
||||
if len(lines) < 6:
|
||||
return None
|
||||
try:
|
||||
day = int(lines[2].split("Date: ")[1])
|
||||
month, year = (lines[3].split(" ")[0]).split()
|
||||
on_date = datetime.strptime(f"{day}-{month}-{year}", "%d-%B-%Y")
|
||||
symbol = lines[4].split(":")[1].split()[0].strip()
|
||||
quantity = Decimal(lines[4].split("Bought")[1].split()[0])
|
||||
price_str = lines[4].split("Bought")[1].split("@")[1].split()[0].split(_CURRENCY_SIGN)[1]
|
||||
unit_price = Decimal(price_str)
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
return _build_activity(
|
||||
on_date=on_date,
|
||||
symbol=symbol,
|
||||
quantity=quantity,
|
||||
unit_price=unit_price,
|
||||
strategy="rfc2822-v1",
|
||||
matched=lines[4],
|
||||
)
|
||||
|
||||
|
||||
def _build_activity(
|
||||
*,
|
||||
on_date: datetime,
|
||||
symbol: str,
|
||||
quantity: Decimal,
|
||||
unit_price: Decimal,
|
||||
strategy: str,
|
||||
matched: str,
|
||||
) -> Activity:
|
||||
fingerprint = _fingerprint(on_date, symbol, quantity, unit_price)
|
||||
return Activity(
|
||||
external_id=f"invest-engine:{fingerprint}",
|
||||
account_id=_ACCOUNT_ID,
|
||||
account_type=AccountType.ISA,
|
||||
date=on_date,
|
||||
activity_type=ActivityType.BUY,
|
||||
currency="GBP",
|
||||
symbol=symbol,
|
||||
quantity=quantity,
|
||||
unit_price=unit_price,
|
||||
notes=f"[{strategy}] {matched.strip()}",
|
||||
)
|
||||
|
||||
|
||||
def _fingerprint(date: datetime, symbol: str, quantity: Decimal, unit_price: Decimal) -> str:
|
||||
key = f"{date.isoformat()}|{symbol}|{quantity}|{unit_price}"
|
||||
return hashlib.sha256(key.encode("utf-8")).hexdigest()[:16]
|
||||
15
tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml
vendored
Normal file
15
tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
From: InvestEngine <no-reply@investengine.com>
|
||||
To: viktorbarzin@example.com
|
||||
Subject: Your portfolio has been updated
|
||||
Date: Tue, 17 Jan 2023 14:48:00 +0000
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
<https://investengine.com/> We've executed your orders and your
|
||||
portfolio has been updated Client name: Redacted Trading
|
||||
venue: London Stock Exchange Type: Market Order(s) Date: 17 January
|
||||
2023 Here's a summary of the trades we've made for you
|
||||
Vanguard S&P 500: VUAG Bought 59.539562 @ £60.46 per share Total:
|
||||
£3600.00 ISIN: IE00BFMXXD54, Order ID: 199510/2163746, Traded at
|
||||
2:48pm GMT/UTC Take me to my updated portfolio
|
||||
0
tests/providers/parsers/__init__.py
Normal file
0
tests/providers/parsers/__init__.py
Normal file
44
tests/providers/parsers/test_invest_engine.py
Normal file
44
tests/providers/parsers/test_invest_engine.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
|
||||
from broker_sync.models import AccountType, ActivityType
|
||||
from broker_sync.providers.parsers.invest_engine import parse_invest_engine_email
|
||||
|
||||
_FIXTURES = Path(__file__).parent.parent.parent / "fixtures" / "invest_engine"
|
||||
|
||||
|
||||
def _load(name: str) -> bytes:
|
||||
return (_FIXTURES / name).read_bytes()
|
||||
|
||||
|
||||
# -- RFC 2822 body (v2-style, single BUY) --
|
||||
|
||||
|
||||
def test_rfc2822_single_buy_parses_to_one_activity() -> None:
|
||||
activities = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))
|
||||
assert len(activities) == 1
|
||||
a = activities[0]
|
||||
assert a.activity_type is ActivityType.BUY
|
||||
assert a.symbol == "VUAG"
|
||||
assert a.quantity == Decimal("59.539562")
|
||||
assert a.unit_price == Decimal("60.46")
|
||||
assert a.currency == "GBP"
|
||||
assert a.date == datetime(2023, 1, 17)
|
||||
assert a.account_id == "invest-engine-primary"
|
||||
assert a.account_type is AccountType.ISA
|
||||
|
||||
|
||||
def test_rfc2822_external_id_is_deterministic() -> None:
|
||||
a1 = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0]
|
||||
a2 = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0]
|
||||
assert a1.external_id == a2.external_id
|
||||
assert a1.external_id.startswith("invest-engine:")
|
||||
|
||||
|
||||
def test_rfc2822_notes_record_parse_strategy() -> None:
|
||||
a = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0]
|
||||
assert a.notes is not None
|
||||
assert "rfc2822" in a.notes
|
||||
Loading…
Add table
Add a link
Reference in a new issue