diff --git a/broker_sync/providers/parsers/invest_engine.py b/broker_sync/providers/parsers/invest_engine.py index 7c8a494..2ab4a34 100644 --- a/broker_sync/providers/parsers/invest_engine.py +++ b/broker_sync/providers/parsers/invest_engine.py @@ -16,11 +16,13 @@ Every parse strategy produces canonical `Activity` objects with: from __future__ import annotations +import csv import email import hashlib +import io import re from datetime import datetime -from decimal import Decimal +from decimal import Decimal, InvalidOperation from email.message import Message from bs4 import BeautifulSoup @@ -48,8 +50,9 @@ _DATE_RE = re.compile( def parse_invest_engine_email(raw_email: bytes) -> list[Activity]: """Parse an IE trade confirmation email into Activity records. - Tries RFC 2822 body lines first, then HTML tables. Returns an empty - list when nothing matches — never raises on malformed input. + Tries RFC 2822 body lines first, then HTML tables, then a CSV + attachment. Returns an empty list when nothing matches — never + raises on malformed input. """ msg = email.message_from_bytes(raw_email) text_body = _extract_part_body(msg, "text/plain") @@ -62,6 +65,9 @@ def parse_invest_engine_email(raw_email: bytes) -> list[Activity]: activities = _parse_html_tables(html_body) if activities: return activities + csv_activities = _parse_csv_attachment(raw_email) + if csv_activities: + return csv_activities return [] @@ -223,6 +229,77 @@ def _extract_html_symbol(nested: object) -> str | None: return None +_CSV_CONTENT_TYPES = {"text/csv", "application/csv", "application/vnd.ms-excel"} +# Required columns for the CSV attachment strategy. IE has not (yet) sent +# CSV-attached statements in production — the column set here mirrors the +# upstream _extract_positions_csv contract (ticker, buy_price, num_shares, +# buy_date, currency) with modern names. +_CSV_COLUMNS = {"ticker", "unit_price", "quantity", "date", "currency"} + + +def _parse_csv_attachment(raw_email: bytes) -> list[Activity]: + """Parse a CSV attachment from the email into Activity records. + + Walks every MIME part, picks the first one with a CSV-ish content + type OR a `.csv` filename, and iterates its rows. Rows missing a + required column or with an unparseable number/date are skipped. + """ + msg = email.message_from_bytes(raw_email) + csv_text = _extract_csv_attachment_text(msg) + if csv_text is None: + return [] + reader = csv.DictReader(io.StringIO(csv_text)) + fieldnames = set(reader.fieldnames or []) + if not _CSV_COLUMNS.issubset(fieldnames): + return [] + activities: list[Activity] = [] + for row in reader: + activity = _csv_row_to_activity(row) + if activity is not None: + activities.append(activity) + return activities + + +def _extract_csv_attachment_text(msg: Message) -> str | None: + for part in msg.walk(): + if not _looks_like_csv_part(part): + continue + payload = part.get_payload(decode=True) + if isinstance(payload, bytes): + return payload.decode(part.get_content_charset() or "utf-8", errors="replace") + if isinstance(payload, str): + return payload + return None + + +def _looks_like_csv_part(part: Message) -> bool: + if part.get_content_type() in _CSV_CONTENT_TYPES: + return True + filename = part.get_filename() + return isinstance(filename, str) and filename.lower().endswith(".csv") + + +def _csv_row_to_activity(row: dict[str, str]) -> Activity | None: + try: + on_date = datetime.strptime(row["date"], "%Y-%m-%d") + symbol = row["ticker"].strip() + quantity = Decimal(row["quantity"]) + unit_price = Decimal(row["unit_price"]) + currency = row["currency"].strip() or "GBP" + except (KeyError, ValueError, InvalidOperation): + return None + if not symbol or currency != "GBP": + return None + return _build_activity( + on_date=on_date, + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + strategy="csv", + matched=f"{symbol},{unit_price},{quantity},{row['date']}", + ) + + def _build_activity( *, on_date: datetime, diff --git a/tests/fixtures/invest_engine/csv_attachment.eml b/tests/fixtures/invest_engine/csv_attachment.eml new file mode 100644 index 0000000..b247c00 --- /dev/null +++ b/tests/fixtures/invest_engine/csv_attachment.eml @@ -0,0 +1,22 @@ +From: InvestEngine +To: viktorbarzin@example.com +Subject: Your InvestEngine statement +Date: Mon, 07 Apr 2025 09:00:00 +0000 +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="----=_MIXED_1" + +------=_MIXED_1 +Content-Type: text/plain; charset=UTF-8 + +Your monthly statement is attached as a CSV. + +------=_MIXED_1 +Content-Type: text/csv; charset=UTF-8; name="statement.csv" +Content-Disposition: attachment; filename="statement.csv" + +ticker,unit_price,quantity,date,currency +VUAG,63.21,12.5,2025-04-02,GBP +SWDA,86.40,4.75,2025-04-03,GBP +VUSA,90.10,1.0,2025-04-04,GBP + +------=_MIXED_1-- diff --git a/tests/providers/parsers/test_invest_engine.py b/tests/providers/parsers/test_invest_engine.py index d397bda..8ef81d3 100644 --- a/tests/providers/parsers/test_invest_engine.py +++ b/tests/providers/parsers/test_invest_engine.py @@ -68,3 +68,24 @@ def test_html_notes_record_html_strategy() -> None: a = parse_invest_engine_email(_load("html_two_orders.eml"))[0] assert a.notes is not None assert "html" in a.notes + + +# -- CSV attachment body -- + + +def test_csv_attachment_parses_all_rows() -> None: + activities = parse_invest_engine_email(_load("csv_attachment.eml")) + assert len(activities) == 3 + by_symbol = {a.symbol: a for a in activities} + assert by_symbol["VUAG"].quantity == Decimal("12.5") + assert by_symbol["VUAG"].unit_price == Decimal("63.21") + assert by_symbol["VUAG"].date == datetime(2025, 4, 2) + assert by_symbol["SWDA"].quantity == Decimal("4.75") + assert by_symbol["VUSA"].date == datetime(2025, 4, 4) + for a in activities: + assert a.activity_type is ActivityType.BUY + assert a.currency == "GBP" + assert a.account_id == "invest-engine-primary" + assert a.account_type is AccountType.ISA + assert a.notes is not None + assert "csv" in a.notes