diff --git a/broker_sync/providers/parsers/schwab.py b/broker_sync/providers/parsers/schwab.py new file mode 100644 index 0000000..fe5f5f3 --- /dev/null +++ b/broker_sync/providers/parsers/schwab.py @@ -0,0 +1,75 @@ +"""Schwab workplace-RSU email parser. + +Schwab sends HTML transaction-confirmation emails with the core fields in +five `` elements: +1. Trade date (human format — e.g. "Jan 23, 2025") +2. Direction word ("Sold" for SELL; anything else is BUY) +3. Quantity (share count, float) +4. Ticker +5. Price ("$123.45" — currency-sign-prefixed) + +One email → one Activity. On any parse failure we return an empty list +(same as the original finance/ behaviour — an unparseable email shouldn't +crash the whole IMAP batch). + +Ported from finance/position/provider/schwab/message_parser.py (39 lines). +Dropped: per-row timestamp id suffix (we use ISO date + ticker + qty which +is stable across re-pulls), currency-from-sign hackery (US Schwab is USD- +only in practice — if that ever changes we'll add FX on parse). +""" +from __future__ import annotations + +from decimal import Decimal, InvalidOperation + +from bs4 import BeautifulSoup +from dateutil import parser as dateparser + +from broker_sync.models import AccountType, Activity, ActivityType + +_ACCOUNT_ID = "schwab-workplace" +_DEFAULT_CURRENCY = "USD" + + +def parse_schwab_email(raw_html: str) -> list[Activity]: + """Return a single-item list of Activity on success, empty on failure.""" + try: + soup = BeautifulSoup(raw_html, "html.parser") + cells = [ + td.get_text(strip=True) for td in soup.find_all("td", { + "class": "dark-background-body", + "align": "right" + }) + ] + if len(cells) < 5: + return [] + + date_txt, direction_txt, qty_txt, ticker, price_txt = cells[:5] + trade_date = dateparser.parse(date_txt) + direction = (ActivityType.SELL + if direction_txt.strip().lower() == "sold" else ActivityType.BUY) + quantity = Decimal(qty_txt.replace(",", "").strip()) + # Price like "$123.45" — strip the currency sign and parse the numeric tail. + # Handle "£", "€", "USD", etc. by taking the last numeric span. + price_clean = price_txt + for sign in ("$", "£", "€", "USD", "GBP", "EUR"): + price_clean = price_clean.replace(sign, "") + unit_price = Decimal(price_clean.replace(",", "").strip()) + + external_id = (f"schwab:{trade_date.date().isoformat()}:{ticker}:" + f"{direction.value}:{quantity}") + return [ + Activity( + external_id=external_id, + account_id=_ACCOUNT_ID, + account_type=AccountType.GIA, + date=trade_date, + activity_type=direction, + symbol=ticker.strip(), + quantity=quantity, + unit_price=unit_price, + currency=_DEFAULT_CURRENCY, + notes=f"schwab-email:{direction_txt}", + ) + ] + except (ValueError, InvalidOperation, IndexError, AttributeError): + return [] diff --git a/poetry.lock b/poetry.lock index 58029c8..73fc482 100644 --- a/poetry.lock +++ b/poetry.lock @@ -628,6 +628,18 @@ rich = ">=10.11.0" shellingham = ">=1.3.0" typing-extensions = ">=3.7.4.3" +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20260408" +description = "Typing stubs for python-dateutil" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "types_python_dateutil-2.9.0.20260408-py3-none-any.whl", hash = "sha256:473139d514a71c9d1fbd8bb328974bedcb1cc3dba57aad04ffa4157f483c216f"}, + {file = "types_python_dateutil-2.9.0.20260408.tar.gz", hash = "sha256:8b056ec01568674235f64ecbcef928972a5fac412f5aab09c516dfa2acfbb582"}, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -658,4 +670,4 @@ platformdirs = ">=3.5.1" [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "b9c19ac1963682740a98cd539d3790ff180c2e8195d5cfcc9572da855db3fa7d" +content-hash = "04a3e24fe45c75f975140aff6076af0a156772a1a8e82eba30ee2345ac1d8bd6" diff --git a/pyproject.toml b/pyproject.toml index adcf5cc..0a25a66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ pytest-asyncio = "^0.23" mypy = "^1.11" ruff = "^0.6" yapf = "^0.43" +types-python-dateutil = "^2.9.0.20260408" [tool.poetry.scripts] broker-sync = "broker_sync.cli:app" diff --git a/tests/providers/parsers/test_schwab.py b/tests/providers/parsers/test_schwab.py new file mode 100644 index 0000000..8e3c736 --- /dev/null +++ b/tests/providers/parsers/test_schwab.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from decimal import Decimal + +from broker_sync.models import AccountType, ActivityType +from broker_sync.providers.parsers.schwab import parse_schwab_email + +_SELL = """ + + + + + + + +
DateJan 23, 2025
ActionSold
Quantity100.0
TickerMETA
Price$612.34
+ +""" + +_BUY = """ + + + + + + +
2024-11-15
Bought
5.5
AAPL
$225.00
+""" + +_MALFORMED = "no transaction here" + +_MISSING_CELLS = """ + + + +
Jan 23, 2025
Sold
+""" + + +def test_sell_email_parses_to_one_sell_activity() -> None: + acts = parse_schwab_email(_SELL) + assert len(acts) == 1 + a = acts[0] + assert a.activity_type is ActivityType.SELL + assert a.symbol == "META" + assert a.quantity == Decimal("100.0") + assert a.unit_price == Decimal("612.34") + assert a.currency == "USD" + assert a.account_id == "schwab-workplace" + assert a.account_type is AccountType.GIA + assert a.date.date().isoformat() == "2025-01-23" + + +def test_buy_email_becomes_buy_activity() -> None: + acts = parse_schwab_email(_BUY) + assert len(acts) == 1 + a = acts[0] + assert a.activity_type is ActivityType.BUY + assert a.symbol == "AAPL" + assert a.quantity == Decimal("5.5") + assert a.unit_price == Decimal("225.00") + + +def test_malformed_email_returns_empty_list() -> None: + # No matching td cells at all. + assert parse_schwab_email(_MALFORMED) == [] + + +def test_missing_cells_returns_empty_list() -> None: + # Only 2 of the 5 required cells — parser must bail cleanly. + assert parse_schwab_email(_MISSING_CELLS) == [] + + +def test_external_id_is_stable_across_reruns() -> None: + # Same email → same external_id (deterministic, not timestamp-based). + a1 = parse_schwab_email(_SELL)[0] + a2 = parse_schwab_email(_SELL)[0] + assert a1.external_id == a2.external_id + + +def test_price_with_commas_parses() -> None: + html = _SELL.replace("$612.34", "$1,612.34") + a = parse_schwab_email(html)[0] + assert a.unit_price == Decimal("1612.34")