Add Schwab email parser (port from finance/)
Schwab's workplace-RSU confirmation emails have 5 data td elements
with class='dark-background-body' align='right': date, direction, qty,
ticker, price-with-currency-sign. One email → one Activity.
- parse_schwab_email(raw_html) -> list[Activity] (1-item or empty)
- Empty on any parse failure (IMAP batch shouldn't crash on one bad mail)
- Deterministic external_id ('schwab📅ticker:type:qty') — stable
across re-pulls so dedup works
- Hardcoded to account 'schwab-workplace' / AccountType.GIA / USD
- 6 unit tests: SELL + BUY happy path, malformed, missing cells,
external-id stability, commas in price
Dropped from the original finance port:
- msg_timestamp-based external id (non-deterministic — would re-import
on every IMAP walk). Replaced with a hash-stable key.
- Currency.from_sign() currency hack. Schwab US is USD-only; we'll add
FX when that changes.
poetry run pytest -q → 109 passed, 1 skipped
poetry run mypy → clean (added types-python-dateutil)
poetry run ruff check → clean
This commit is contained in:
parent
1aa60ce348
commit
f089b8b93a
4 changed files with 173 additions and 1 deletions
75
broker_sync/providers/parsers/schwab.py
Normal file
75
broker_sync/providers/parsers/schwab.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
"""Schwab workplace-RSU email parser.
|
||||
|
||||
Schwab sends HTML transaction-confirmation emails with the core fields in
|
||||
five `<td class="dark-background-body" align="right">` elements:
|
||||
1. Trade date (human format — e.g. "Jan 23, 2025")
|
||||
2. Direction word ("Sold" for SELL; anything else is BUY)
|
||||
3. Quantity (share count, float)
|
||||
4. Ticker
|
||||
5. Price ("$123.45" — currency-sign-prefixed)
|
||||
|
||||
One email → one Activity. On any parse failure we return an empty list
|
||||
(same as the original finance/ behaviour — an unparseable email shouldn't
|
||||
crash the whole IMAP batch).
|
||||
|
||||
Ported from finance/position/provider/schwab/message_parser.py (39 lines).
|
||||
Dropped: per-row timestamp id suffix (we use ISO date + ticker + qty which
|
||||
is stable across re-pulls), currency-from-sign hackery (US Schwab is USD-
|
||||
only in practice — if that ever changes we'll add FX on parse).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from decimal import Decimal, InvalidOperation
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from dateutil import parser as dateparser
|
||||
|
||||
from broker_sync.models import AccountType, Activity, ActivityType
|
||||
|
||||
_ACCOUNT_ID = "schwab-workplace"
|
||||
_DEFAULT_CURRENCY = "USD"
|
||||
|
||||
|
||||
def parse_schwab_email(raw_html: str) -> list[Activity]:
|
||||
"""Return a single-item list of Activity on success, empty on failure."""
|
||||
try:
|
||||
soup = BeautifulSoup(raw_html, "html.parser")
|
||||
cells = [
|
||||
td.get_text(strip=True) for td in soup.find_all("td", {
|
||||
"class": "dark-background-body",
|
||||
"align": "right"
|
||||
})
|
||||
]
|
||||
if len(cells) < 5:
|
||||
return []
|
||||
|
||||
date_txt, direction_txt, qty_txt, ticker, price_txt = cells[:5]
|
||||
trade_date = dateparser.parse(date_txt)
|
||||
direction = (ActivityType.SELL
|
||||
if direction_txt.strip().lower() == "sold" else ActivityType.BUY)
|
||||
quantity = Decimal(qty_txt.replace(",", "").strip())
|
||||
# Price like "$123.45" — strip the currency sign and parse the numeric tail.
|
||||
# Handle "£", "€", "USD", etc. by taking the last numeric span.
|
||||
price_clean = price_txt
|
||||
for sign in ("$", "£", "€", "USD", "GBP", "EUR"):
|
||||
price_clean = price_clean.replace(sign, "")
|
||||
unit_price = Decimal(price_clean.replace(",", "").strip())
|
||||
|
||||
external_id = (f"schwab:{trade_date.date().isoformat()}:{ticker}:"
|
||||
f"{direction.value}:{quantity}")
|
||||
return [
|
||||
Activity(
|
||||
external_id=external_id,
|
||||
account_id=_ACCOUNT_ID,
|
||||
account_type=AccountType.GIA,
|
||||
date=trade_date,
|
||||
activity_type=direction,
|
||||
symbol=ticker.strip(),
|
||||
quantity=quantity,
|
||||
unit_price=unit_price,
|
||||
currency=_DEFAULT_CURRENCY,
|
||||
notes=f"schwab-email:{direction_txt}",
|
||||
)
|
||||
]
|
||||
except (ValueError, InvalidOperation, IndexError, AttributeError):
|
||||
return []
|
||||
14
poetry.lock
generated
14
poetry.lock
generated
|
|
@ -628,6 +628,18 @@ rich = ">=10.11.0"
|
|||
shellingham = ">=1.3.0"
|
||||
typing-extensions = ">=3.7.4.3"
|
||||
|
||||
[[package]]
|
||||
name = "types-python-dateutil"
|
||||
version = "2.9.0.20260408"
|
||||
description = "Typing stubs for python-dateutil"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "types_python_dateutil-2.9.0.20260408-py3-none-any.whl", hash = "sha256:473139d514a71c9d1fbd8bb328974bedcb1cc3dba57aad04ffa4157f483c216f"},
|
||||
{file = "types_python_dateutil-2.9.0.20260408.tar.gz", hash = "sha256:8b056ec01568674235f64ecbcef928972a5fac412f5aab09c516dfa2acfbb582"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.15.0"
|
||||
|
|
@ -658,4 +670,4 @@ platformdirs = ">=3.5.1"
|
|||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.11,<3.13"
|
||||
content-hash = "b9c19ac1963682740a98cd539d3790ff180c2e8195d5cfcc9572da855db3fa7d"
|
||||
content-hash = "04a3e24fe45c75f975140aff6076af0a156772a1a8e82eba30ee2345ac1d8bd6"
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ pytest-asyncio = "^0.23"
|
|||
mypy = "^1.11"
|
||||
ruff = "^0.6"
|
||||
yapf = "^0.43"
|
||||
types-python-dateutil = "^2.9.0.20260408"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
broker-sync = "broker_sync.cli:app"
|
||||
|
|
|
|||
84
tests/providers/parsers/test_schwab.py
Normal file
84
tests/providers/parsers/test_schwab.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from broker_sync.models import AccountType, ActivityType
|
||||
from broker_sync.providers.parsers.schwab import parse_schwab_email
|
||||
|
||||
_SELL = """
|
||||
<html><body>
|
||||
<table>
|
||||
<tr><td>Date</td><td class="dark-background-body" align="right">Jan 23, 2025</td></tr>
|
||||
<tr><td>Action</td><td class="dark-background-body" align="right">Sold</td></tr>
|
||||
<tr><td>Quantity</td><td class="dark-background-body" align="right">100.0</td></tr>
|
||||
<tr><td>Ticker</td><td class="dark-background-body" align="right">META</td></tr>
|
||||
<tr><td>Price</td><td class="dark-background-body" align="right">$612.34</td></tr>
|
||||
</table>
|
||||
</body></html>
|
||||
"""
|
||||
|
||||
_BUY = """
|
||||
<html><body><table>
|
||||
<tr><td class="dark-background-body" align="right">2024-11-15</td></tr>
|
||||
<tr><td class="dark-background-body" align="right">Bought</td></tr>
|
||||
<tr><td class="dark-background-body" align="right">5.5</td></tr>
|
||||
<tr><td class="dark-background-body" align="right">AAPL</td></tr>
|
||||
<tr><td class="dark-background-body" align="right">$225.00</td></tr>
|
||||
</table></body></html>
|
||||
"""
|
||||
|
||||
_MALFORMED = "<html><body>no transaction here</body></html>"
|
||||
|
||||
_MISSING_CELLS = """
|
||||
<html><body><table>
|
||||
<tr><td class="dark-background-body" align="right">Jan 23, 2025</td></tr>
|
||||
<tr><td class="dark-background-body" align="right">Sold</td></tr>
|
||||
</table></body></html>
|
||||
"""
|
||||
|
||||
|
||||
def test_sell_email_parses_to_one_sell_activity() -> None:
|
||||
acts = parse_schwab_email(_SELL)
|
||||
assert len(acts) == 1
|
||||
a = acts[0]
|
||||
assert a.activity_type is ActivityType.SELL
|
||||
assert a.symbol == "META"
|
||||
assert a.quantity == Decimal("100.0")
|
||||
assert a.unit_price == Decimal("612.34")
|
||||
assert a.currency == "USD"
|
||||
assert a.account_id == "schwab-workplace"
|
||||
assert a.account_type is AccountType.GIA
|
||||
assert a.date.date().isoformat() == "2025-01-23"
|
||||
|
||||
|
||||
def test_buy_email_becomes_buy_activity() -> None:
|
||||
acts = parse_schwab_email(_BUY)
|
||||
assert len(acts) == 1
|
||||
a = acts[0]
|
||||
assert a.activity_type is ActivityType.BUY
|
||||
assert a.symbol == "AAPL"
|
||||
assert a.quantity == Decimal("5.5")
|
||||
assert a.unit_price == Decimal("225.00")
|
||||
|
||||
|
||||
def test_malformed_email_returns_empty_list() -> None:
|
||||
# No matching td cells at all.
|
||||
assert parse_schwab_email(_MALFORMED) == []
|
||||
|
||||
|
||||
def test_missing_cells_returns_empty_list() -> None:
|
||||
# Only 2 of the 5 required cells — parser must bail cleanly.
|
||||
assert parse_schwab_email(_MISSING_CELLS) == []
|
||||
|
||||
|
||||
def test_external_id_is_stable_across_reruns() -> None:
|
||||
# Same email → same external_id (deterministic, not timestamp-based).
|
||||
a1 = parse_schwab_email(_SELL)[0]
|
||||
a2 = parse_schwab_email(_SELL)[0]
|
||||
assert a1.external_id == a2.external_id
|
||||
|
||||
|
||||
def test_price_with_commas_parses() -> None:
|
||||
html = _SELL.replace("$612.34", "$1,612.34")
|
||||
a = parse_schwab_email(html)[0]
|
||||
assert a.unit_price == Decimal("1612.34")
|
||||
Loading…
Add table
Add a link
Reference in a new issue