From abf9fa7cb54728594bae368b6558994a780f3573 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 27 May 2026 09:40:56 +0000 Subject: [PATCH] parsers/schwab: drop dead vest-release path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _parse_vest_release path and _VEST_*_RE regexes never matched a real email in 4 years of inbox history (2022-08 → 2026-05, 188 Schwab emails surveyed). Schwab Stock Plan Services does not email release confirmations to the employee address for the workplace account — only the sell-to-cover trade-executed alert lands. Vest data must come from the META payslip via payslip-ingest (tracked as code-fqgr). Removed: - _VEST_SUBJECT_RE + 5 _VEST_*_RE regexes (heuristic, never validated) - _parse_vest_release function - VestParseResult dataclass - parse_schwab_email_full wrapper - _search_group helper (only used by vest path) - 3 dead tests + the _VEST_RELEASE fixture Kept models.VestEvent — the payslip→Wealthfolio sink in code-fqgr will need it. Co-Authored-By: Claude Opus 4.7 --- broker_sync/providers/parsers/schwab.py | 193 ++---------------------- tests/providers/parsers/test_schwab.py | 56 ------- 2 files changed, 13 insertions(+), 236 deletions(-) diff --git a/broker_sync/providers/parsers/schwab.py b/broker_sync/providers/parsers/schwab.py index aeef7d0..762a613 100644 --- a/broker_sync/providers/parsers/schwab.py +++ b/broker_sync/providers/parsers/schwab.py @@ -1,79 +1,37 @@ """Schwab workplace-RSU email parser. -Two email shapes are handled: +Schwab Stock Plan Services sends a "Your trade was executed" email for +each sell-to-cover trade (and any user-initiated trade) on the workplace +account. The body has five `` +cells holding date / direction / quantity / ticker / price. -1. Trade confirmations (sell-to-cover or user-initiated trades): HTML - with five `` cells - holding date / direction / quantity / ticker / price. → one Activity. +It does NOT email vest-release / Release Confirmation messages to the +employee address for this account (verified against 4 years of inbox +history, 2022-2026 — see infra/docs in code-fqgr). Vest data must come +from the META payslip via payslip-ingest, not from email. The whole +vest-release parser that used to live here was dead code. -2. Release Confirmations (RSU vest events): subject/body mentions - "Release Confirmation" or "Award Vesting"; body lists vest date, - shares released, FMV, shares sold to cover, and USD tax withheld. - → (Activity, Activity, VestEvent) tuple: the gross vest (BUY at FMV), - the sell-to-cover (SELL at FMV), and a standalone VestEvent for the - payslip-ingest reconciliation pipeline. - -On any parse failure we return the neutral empty result (no Activities, -no VestEvent) — an unparseable email shouldn't crash the IMAP batch. +On any parse failure we return an empty list — an unparseable email +shouldn't crash the IMAP batch. """ from __future__ import annotations import logging -import re -from dataclasses import dataclass from decimal import Decimal, InvalidOperation from bs4 import BeautifulSoup from dateutil import parser as dateparser -from broker_sync.models import AccountType, Activity, ActivityType, VestEvent +from broker_sync.models import AccountType, Activity, ActivityType log = logging.getLogger(__name__) _ACCOUNT_ID = "schwab-workplace" _DEFAULT_CURRENCY = "USD" -# Vest-confirmation emails reliably include one of these phrases. Matching -# is case-insensitive and on the raw HTML (cheap — no DOM parse needed). -_VEST_SUBJECT_RE = re.compile(r"Release Confirmation|Award Vesting|RSU Release", - re.IGNORECASE) - - -@dataclass -class VestParseResult: - activities: list[Activity] - vest_event: VestEvent | None - def parse_schwab_email(raw_html: str) -> list[Activity]: - """Return a single-item list of Activity on success, empty on failure. - - For vest-confirmation emails, returns the two Activity rows (gross - vest + sell-to-cover). Use `parse_schwab_email_full` when the caller - also needs the VestEvent. - """ - return parse_schwab_email_full(raw_html).activities - - -def parse_schwab_email_full(raw_html: str) -> VestParseResult: - """Full parse — returns activities + optional VestEvent. - - Dispatches: vest-confirmation emails → `_parse_vest_release`; - everything else → the legacy single-row confirmation parser. - """ - if _VEST_SUBJECT_RE.search(raw_html): - result = _parse_vest_release(raw_html) - if result is not None: - return result - log.warning("schwab: detected vest email but could not extract fields; " - "add a real fixture to broker-sync/tests/fixtures/") - return VestParseResult(activities=[], vest_event=None) - - return VestParseResult(activities=_parse_trade_confirmation(raw_html), vest_event=None) - - -def _parse_trade_confirmation(raw_html: str) -> list[Activity]: - """Legacy 5-cell trade confirmation parser.""" + """Return a one-element list of Activity on success, empty on failure.""" try: soup = BeautifulSoup(raw_html, "html.parser") cells = [ @@ -90,8 +48,6 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]: direction = (ActivityType.SELL if direction_txt.strip().lower() == "sold" else ActivityType.BUY) quantity = Decimal(qty_txt.replace(",", "").strip()) - # Price like "$123.45" — strip the currency sign and parse the numeric tail. - # Handle "£", "€", "USD", etc. by taking the last numeric span. price_clean = price_txt for sign in ("$", "£", "€", "USD", "GBP", "EUR"): price_clean = price_clean.replace(sign, "") @@ -115,126 +71,3 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]: ] except (ValueError, InvalidOperation, IndexError, AttributeError): return [] - - -# Heuristic extractors for vest-release emails. Labels observed in public -# Schwab RSU release samples; real fixture needed to tighten these. -_VEST_DATE_RE = re.compile( - r"(?:Release Date|Vest Date|Vesting Date)\s*[:<][^0-9]*" - r"(\d{1,2}[\s/\-][A-Za-z]{3}[\s/\-]\d{2,4}|\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})", - re.IGNORECASE) -_VEST_TICKER_RE = re.compile(r"(?:Ticker|Symbol)\s*[:<]\s*([A-Z]{2,5})", - re.IGNORECASE) -_VEST_SHARES_RELEASED_RE = re.compile( - r"(?:Shares Released|Total Shares (?:Released|Vested))\s*[:<]\s*" - r"([\d,]+(?:\.\d+)?)", - re.IGNORECASE) -_VEST_SHARES_WITHHELD_RE = re.compile( - r"(?:Shares (?:Withheld|Sold)(?: for Taxes)?)\s*[:<]\s*" - r"([\d,]+(?:\.\d+)?)", - re.IGNORECASE) -_VEST_FMV_RE = re.compile( - r"(?:Market Price|FMV|Fair Market Value)\s*[:<]\s*" - r"\$?\s*([\d,]+(?:\.\d+)?)", - re.IGNORECASE) -_VEST_TAX_USD_RE = re.compile( - r"(?:Tax Withholding Amount|Total Tax Withholding|Tax Withheld)\s*[:<]\s*" - r"\$?\s*([\d,]+(?:\.\d+)?)", - re.IGNORECASE) - - -def _parse_vest_release(raw_html: str) -> VestParseResult | None: - """Best-effort extraction from a Schwab Release Confirmation email. - - Runs label regexes on the plain-text view of the HTML. Returns None - (signalling fall-through) if the core four fields (date, ticker, - shares released, FMV) don't all resolve — that's a strong signal the - heuristics need a real fixture before they can be trusted on a live - email. - """ - try: - soup = BeautifulSoup(raw_html, "html.parser") - text = soup.get_text(" ", strip=True) - except Exception: - return None - - date_str = _search_group(_VEST_DATE_RE, text) - ticker = _search_group(_VEST_TICKER_RE, text) - shares_released_str = _search_group(_VEST_SHARES_RELEASED_RE, text) - fmv_str = _search_group(_VEST_FMV_RE, text) - if not (date_str and ticker and shares_released_str and fmv_str): - return None - - try: - vest_date = dateparser.parse(date_str) - shares_vested = Decimal(shares_released_str.replace(",", "")) - fmv = Decimal(fmv_str.replace(",", "")) - except (ValueError, InvalidOperation): - return None - - shares_sold_str = _search_group(_VEST_SHARES_WITHHELD_RE, text) - shares_sold_to_cover = (Decimal(shares_sold_str.replace(",", "")) - if shares_sold_str else None) - tax_usd_str = _search_group(_VEST_TAX_USD_RE, text) - tax_withheld_usd = (Decimal(tax_usd_str.replace(",", "")) - if tax_usd_str else None) - - external_id = (f"schwab:{vest_date.date().isoformat()}:{ticker}:VEST:" - f"{shares_vested}") - - vest_event = VestEvent( - external_id=external_id, - vest_date=vest_date, - ticker=ticker, - shares_vested=shares_vested, - shares_sold_to_cover=shares_sold_to_cover, - fmv_at_vest_usd=fmv, - tax_withheld_usd=tax_withheld_usd, - source="schwab_email", - raw={ - "date": date_str, - "ticker": ticker, - "shares_released": shares_released_str, - "fmv": fmv_str, - "shares_withheld": shares_sold_str or "", - "tax_withheld": tax_usd_str or "", - }, - ) - - # Sibling Activities for Wealthfolio: full vest as BUY, sell-to-cover - # slice as SELL, both at the same FMV so net cash = 0 on that day. - activities: list[Activity] = [ - Activity( - external_id=f"{external_id}:BUY", - account_id=_ACCOUNT_ID, - account_type=AccountType.GIA, - date=vest_date, - activity_type=ActivityType.BUY, - symbol=ticker, - quantity=shares_vested, - unit_price=fmv, - currency=_DEFAULT_CURRENCY, - notes="schwab-vest-release", - ) - ] - if shares_sold_to_cover is not None and shares_sold_to_cover > 0: - activities.append( - Activity( - external_id=f"{external_id}:SELL_TO_COVER", - account_id=_ACCOUNT_ID, - account_type=AccountType.GIA, - date=vest_date, - activity_type=ActivityType.SELL, - symbol=ticker, - quantity=shares_sold_to_cover, - unit_price=fmv, - currency=_DEFAULT_CURRENCY, - notes="schwab-sell-to-cover", - )) - - return VestParseResult(activities=activities, vest_event=vest_event) - - -def _search_group(pattern: re.Pattern[str], text: str) -> str | None: - m = pattern.search(text) - return m.group(1).strip() if m else None diff --git a/tests/providers/parsers/test_schwab.py b/tests/providers/parsers/test_schwab.py index c39bd0c..8e3c736 100644 --- a/tests/providers/parsers/test_schwab.py +++ b/tests/providers/parsers/test_schwab.py @@ -82,59 +82,3 @@ def test_price_with_commas_parses() -> None: html = _SELL.replace("$612.34", "$1,612.34") a = parse_schwab_email(html)[0] assert a.unit_price == Decimal("1612.34") - - -# --- Vest-release parsing ------------------------------------------------- - -_VEST_RELEASE = """ -

Release Confirmation

-

-Release Date: 15 Mar 2026 -Ticker: META -Total Shares Released: 100.0 -Market Price: $612.34 -Shares Withheld for Taxes: 45 -Tax Withholding Amount: $27,555.30 -

-""" - - -def test_vest_release_returns_two_activities_and_vest_event() -> None: - """Release Confirmation yields a BUY (full vest) + SELL (sell-to-cover) + VestEvent.""" - from broker_sync.providers.parsers.schwab import parse_schwab_email_full - - result = parse_schwab_email_full(_VEST_RELEASE) - assert result.vest_event is not None - assert result.vest_event.ticker == "META" - assert result.vest_event.shares_vested == Decimal("100.0") - assert result.vest_event.shares_sold_to_cover == Decimal("45") - assert result.vest_event.fmv_at_vest_usd == Decimal("612.34") - assert result.vest_event.tax_withheld_usd == Decimal("27555.30") - assert result.vest_event.vest_date.date().isoformat() == "2026-03-15" - assert result.vest_event.external_id.startswith("schwab:2026-03-15:META:VEST:") - - assert len(result.activities) == 2 - buy = result.activities[0] - assert buy.activity_type is ActivityType.BUY - assert buy.quantity == Decimal("100.0") - sell = result.activities[1] - assert sell.activity_type is ActivityType.SELL - assert sell.quantity == Decimal("45") - assert sell.unit_price == Decimal("612.34") - - -def test_vest_email_with_unparseable_body_returns_empty() -> None: - """Subject says Release Confirmation but fields missing → empty result, no crash.""" - from broker_sync.providers.parsers.schwab import parse_schwab_email_full - - html = "Release Confirmation — please contact support" - result = parse_schwab_email_full(html) - assert result.vest_event is None - assert result.activities == [] - - -def test_back_compat_parse_schwab_email_drops_vest_event() -> None: - """The legacy list[Activity] shape remains stable for existing callers.""" - acts = parse_schwab_email(_VEST_RELEASE) - assert len(acts) == 2 - assert all(isinstance(a.activity_type, ActivityType) for a in acts)