diff --git a/broker_sync/models.py b/broker_sync/models.py index 17eff39..dd1be88 100644 --- a/broker_sync/models.py +++ b/broker_sync/models.py @@ -102,3 +102,27 @@ def _fmt(v: Decimal | None) -> str: if v is None: return "" return format(v, "f") + + +@dataclass +class VestEvent: + """Schwab RSU vest event — written to payslip_ingest.rsu_vest_events. + + Carries both the gross vest (shares x FMV) and the sell-to-cover portion + (shares withheld for tax x FMV). Sibling Activity records (one BUY for + the full vest, one SELL for the sold-to-cover slice) are produced + separately for Wealthfolio. + + USD-only at parse time; FX conversion happens at the postgres sink via + the ECB daily rate so the DB row carries both the raw USD figures and + the GBP-translated values for dashboard joins. + """ + external_id: str # schwab:{date}:{ticker}:VEST:{shares_vested} + vest_date: datetime + ticker: str + shares_vested: Decimal + shares_sold_to_cover: Decimal | None + fmv_at_vest_usd: Decimal + tax_withheld_usd: Decimal | None + source: str = "schwab_email" + raw: dict[str, str] = field(default_factory=dict) diff --git a/broker_sync/providers/parsers/schwab.py b/broker_sync/providers/parsers/schwab.py index fe5f5f3..aeef7d0 100644 --- a/broker_sync/providers/parsers/schwab.py +++ b/broker_sync/providers/parsers/schwab.py @@ -1,37 +1,79 @@ """Schwab workplace-RSU email parser. -Schwab sends HTML transaction-confirmation emails with the core fields in -five `` elements: -1. Trade date (human format — e.g. "Jan 23, 2025") -2. Direction word ("Sold" for SELL; anything else is BUY) -3. Quantity (share count, float) -4. Ticker -5. Price ("$123.45" — currency-sign-prefixed) +Two email shapes are handled: -One email → one Activity. On any parse failure we return an empty list -(same as the original finance/ behaviour — an unparseable email shouldn't -crash the whole IMAP batch). +1. Trade confirmations (sell-to-cover or user-initiated trades): HTML + with five `` cells + holding date / direction / quantity / ticker / price. → one Activity. -Ported from finance/position/provider/schwab/message_parser.py (39 lines). -Dropped: per-row timestamp id suffix (we use ISO date + ticker + qty which -is stable across re-pulls), currency-from-sign hackery (US Schwab is USD- -only in practice — if that ever changes we'll add FX on parse). +2. Release Confirmations (RSU vest events): subject/body mentions + "Release Confirmation" or "Award Vesting"; body lists vest date, + shares released, FMV, shares sold to cover, and USD tax withheld. + → (Activity, Activity, VestEvent) tuple: the gross vest (BUY at FMV), + the sell-to-cover (SELL at FMV), and a standalone VestEvent for the + payslip-ingest reconciliation pipeline. + +On any parse failure we return the neutral empty result (no Activities, +no VestEvent) — an unparseable email shouldn't crash the IMAP batch. """ from __future__ import annotations +import logging +import re +from dataclasses import dataclass from decimal import Decimal, InvalidOperation from bs4 import BeautifulSoup from dateutil import parser as dateparser -from broker_sync.models import AccountType, Activity, ActivityType +from broker_sync.models import AccountType, Activity, ActivityType, VestEvent + +log = logging.getLogger(__name__) _ACCOUNT_ID = "schwab-workplace" _DEFAULT_CURRENCY = "USD" +# Vest-confirmation emails reliably include one of these phrases. Matching +# is case-insensitive and on the raw HTML (cheap — no DOM parse needed). +_VEST_SUBJECT_RE = re.compile(r"Release Confirmation|Award Vesting|RSU Release", + re.IGNORECASE) + + +@dataclass +class VestParseResult: + activities: list[Activity] + vest_event: VestEvent | None + def parse_schwab_email(raw_html: str) -> list[Activity]: - """Return a single-item list of Activity on success, empty on failure.""" + """Return a single-item list of Activity on success, empty on failure. + + For vest-confirmation emails, returns the two Activity rows (gross + vest + sell-to-cover). Use `parse_schwab_email_full` when the caller + also needs the VestEvent. + """ + return parse_schwab_email_full(raw_html).activities + + +def parse_schwab_email_full(raw_html: str) -> VestParseResult: + """Full parse — returns activities + optional VestEvent. + + Dispatches: vest-confirmation emails → `_parse_vest_release`; + everything else → the legacy single-row confirmation parser. + """ + if _VEST_SUBJECT_RE.search(raw_html): + result = _parse_vest_release(raw_html) + if result is not None: + return result + log.warning("schwab: detected vest email but could not extract fields; " + "add a real fixture to broker-sync/tests/fixtures/") + return VestParseResult(activities=[], vest_event=None) + + return VestParseResult(activities=_parse_trade_confirmation(raw_html), vest_event=None) + + +def _parse_trade_confirmation(raw_html: str) -> list[Activity]: + """Legacy 5-cell trade confirmation parser.""" try: soup = BeautifulSoup(raw_html, "html.parser") cells = [ @@ -73,3 +115,126 @@ def parse_schwab_email(raw_html: str) -> list[Activity]: ] except (ValueError, InvalidOperation, IndexError, AttributeError): return [] + + +# Heuristic extractors for vest-release emails. Labels observed in public +# Schwab RSU release samples; real fixture needed to tighten these. +_VEST_DATE_RE = re.compile( + r"(?:Release Date|Vest Date|Vesting Date)\s*[:<][^0-9]*" + r"(\d{1,2}[\s/\-][A-Za-z]{3}[\s/\-]\d{2,4}|\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})", + re.IGNORECASE) +_VEST_TICKER_RE = re.compile(r"(?:Ticker|Symbol)\s*[:<]\s*([A-Z]{2,5})", + re.IGNORECASE) +_VEST_SHARES_RELEASED_RE = re.compile( + r"(?:Shares Released|Total Shares (?:Released|Vested))\s*[:<]\s*" + r"([\d,]+(?:\.\d+)?)", + re.IGNORECASE) +_VEST_SHARES_WITHHELD_RE = re.compile( + r"(?:Shares (?:Withheld|Sold)(?: for Taxes)?)\s*[:<]\s*" + r"([\d,]+(?:\.\d+)?)", + re.IGNORECASE) +_VEST_FMV_RE = re.compile( + r"(?:Market Price|FMV|Fair Market Value)\s*[:<]\s*" + r"\$?\s*([\d,]+(?:\.\d+)?)", + re.IGNORECASE) +_VEST_TAX_USD_RE = re.compile( + r"(?:Tax Withholding Amount|Total Tax Withholding|Tax Withheld)\s*[:<]\s*" + r"\$?\s*([\d,]+(?:\.\d+)?)", + re.IGNORECASE) + + +def _parse_vest_release(raw_html: str) -> VestParseResult | None: + """Best-effort extraction from a Schwab Release Confirmation email. + + Runs label regexes on the plain-text view of the HTML. Returns None + (signalling fall-through) if the core four fields (date, ticker, + shares released, FMV) don't all resolve — that's a strong signal the + heuristics need a real fixture before they can be trusted on a live + email. + """ + try: + soup = BeautifulSoup(raw_html, "html.parser") + text = soup.get_text(" ", strip=True) + except Exception: + return None + + date_str = _search_group(_VEST_DATE_RE, text) + ticker = _search_group(_VEST_TICKER_RE, text) + shares_released_str = _search_group(_VEST_SHARES_RELEASED_RE, text) + fmv_str = _search_group(_VEST_FMV_RE, text) + if not (date_str and ticker and shares_released_str and fmv_str): + return None + + try: + vest_date = dateparser.parse(date_str) + shares_vested = Decimal(shares_released_str.replace(",", "")) + fmv = Decimal(fmv_str.replace(",", "")) + except (ValueError, InvalidOperation): + return None + + shares_sold_str = _search_group(_VEST_SHARES_WITHHELD_RE, text) + shares_sold_to_cover = (Decimal(shares_sold_str.replace(",", "")) + if shares_sold_str else None) + tax_usd_str = _search_group(_VEST_TAX_USD_RE, text) + tax_withheld_usd = (Decimal(tax_usd_str.replace(",", "")) + if tax_usd_str else None) + + external_id = (f"schwab:{vest_date.date().isoformat()}:{ticker}:VEST:" + f"{shares_vested}") + + vest_event = VestEvent( + external_id=external_id, + vest_date=vest_date, + ticker=ticker, + shares_vested=shares_vested, + shares_sold_to_cover=shares_sold_to_cover, + fmv_at_vest_usd=fmv, + tax_withheld_usd=tax_withheld_usd, + source="schwab_email", + raw={ + "date": date_str, + "ticker": ticker, + "shares_released": shares_released_str, + "fmv": fmv_str, + "shares_withheld": shares_sold_str or "", + "tax_withheld": tax_usd_str or "", + }, + ) + + # Sibling Activities for Wealthfolio: full vest as BUY, sell-to-cover + # slice as SELL, both at the same FMV so net cash = 0 on that day. + activities: list[Activity] = [ + Activity( + external_id=f"{external_id}:BUY", + account_id=_ACCOUNT_ID, + account_type=AccountType.GIA, + date=vest_date, + activity_type=ActivityType.BUY, + symbol=ticker, + quantity=shares_vested, + unit_price=fmv, + currency=_DEFAULT_CURRENCY, + notes="schwab-vest-release", + ) + ] + if shares_sold_to_cover is not None and shares_sold_to_cover > 0: + activities.append( + Activity( + external_id=f"{external_id}:SELL_TO_COVER", + account_id=_ACCOUNT_ID, + account_type=AccountType.GIA, + date=vest_date, + activity_type=ActivityType.SELL, + symbol=ticker, + quantity=shares_sold_to_cover, + unit_price=fmv, + currency=_DEFAULT_CURRENCY, + notes="schwab-sell-to-cover", + )) + + return VestParseResult(activities=activities, vest_event=vest_event) + + +def _search_group(pattern: re.Pattern[str], text: str) -> str | None: + m = pattern.search(text) + return m.group(1).strip() if m else None diff --git a/tests/providers/parsers/test_schwab.py b/tests/providers/parsers/test_schwab.py index 8e3c736..c39bd0c 100644 --- a/tests/providers/parsers/test_schwab.py +++ b/tests/providers/parsers/test_schwab.py @@ -82,3 +82,59 @@ def test_price_with_commas_parses() -> None: html = _SELL.replace("$612.34", "$1,612.34") a = parse_schwab_email(html)[0] assert a.unit_price == Decimal("1612.34") + + +# --- Vest-release parsing ------------------------------------------------- + +_VEST_RELEASE = """ +

Release Confirmation

+

+Release Date: 15 Mar 2026 +Ticker: META +Total Shares Released: 100.0 +Market Price: $612.34 +Shares Withheld for Taxes: 45 +Tax Withholding Amount: $27,555.30 +

+""" + + +def test_vest_release_returns_two_activities_and_vest_event() -> None: + """Release Confirmation yields a BUY (full vest) + SELL (sell-to-cover) + VestEvent.""" + from broker_sync.providers.parsers.schwab import parse_schwab_email_full + + result = parse_schwab_email_full(_VEST_RELEASE) + assert result.vest_event is not None + assert result.vest_event.ticker == "META" + assert result.vest_event.shares_vested == Decimal("100.0") + assert result.vest_event.shares_sold_to_cover == Decimal("45") + assert result.vest_event.fmv_at_vest_usd == Decimal("612.34") + assert result.vest_event.tax_withheld_usd == Decimal("27555.30") + assert result.vest_event.vest_date.date().isoformat() == "2026-03-15" + assert result.vest_event.external_id.startswith("schwab:2026-03-15:META:VEST:") + + assert len(result.activities) == 2 + buy = result.activities[0] + assert buy.activity_type is ActivityType.BUY + assert buy.quantity == Decimal("100.0") + sell = result.activities[1] + assert sell.activity_type is ActivityType.SELL + assert sell.quantity == Decimal("45") + assert sell.unit_price == Decimal("612.34") + + +def test_vest_email_with_unparseable_body_returns_empty() -> None: + """Subject says Release Confirmation but fields missing → empty result, no crash.""" + from broker_sync.providers.parsers.schwab import parse_schwab_email_full + + html = "Release Confirmation — please contact support" + result = parse_schwab_email_full(html) + assert result.vest_event is None + assert result.activities == [] + + +def test_back_compat_parse_schwab_email_drops_vest_event() -> None: + """The legacy list[Activity] shape remains stable for existing callers.""" + acts = parse_schwab_email(_VEST_RELEASE) + assert len(acts) == 2 + assert all(isinstance(a.activity_type, ActivityType) for a in acts)