parsers/schwab: drop dead vest-release path
Some checks are pending
CI / test (push) Waiting to run
CI / build (push) Blocked by required conditions
CI / deploy (push) Blocked by required conditions
ci/woodpecker/push/build Pipeline was successful

The _parse_vest_release path and _VEST_*_RE regexes never matched a
real email in 4 years of inbox history (2022-08 → 2026-05, 188 Schwab
emails surveyed). Schwab Stock Plan Services does not email release
confirmations to the employee address for the workplace account — only
the sell-to-cover trade-executed alert lands. Vest data must come from
the META payslip via payslip-ingest (tracked as code-fqgr).

Removed:
- _VEST_SUBJECT_RE + 5 _VEST_*_RE regexes (heuristic, never validated)
- _parse_vest_release function
- VestParseResult dataclass
- parse_schwab_email_full wrapper
- _search_group helper (only used by vest path)
- 3 dead tests + the _VEST_RELEASE fixture

Kept models.VestEvent — the payslip→Wealthfolio sink in code-fqgr will
need it.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-05-27 09:40:56 +00:00
parent bb9e0d4567
commit abf9fa7cb5
2 changed files with 13 additions and 236 deletions

View file

@ -1,79 +1,37 @@
"""Schwab workplace-RSU email parser.
Two email shapes are handled:
Schwab Stock Plan Services sends a "Your trade was executed" email for
each sell-to-cover trade (and any user-initiated trade) on the workplace
account. The body has five `<td class="dark-background-body" align="right">`
cells holding date / direction / quantity / ticker / price.
1. Trade confirmations (sell-to-cover or user-initiated trades): HTML
with five `<td class="dark-background-body" align="right">` cells
holding date / direction / quantity / ticker / price. one Activity.
It does NOT email vest-release / Release Confirmation messages to the
employee address for this account (verified against 4 years of inbox
history, 2022-2026 see infra/docs in code-fqgr). Vest data must come
from the META payslip via payslip-ingest, not from email. The whole
vest-release parser that used to live here was dead code.
2. Release Confirmations (RSU vest events): subject/body mentions
"Release Confirmation" or "Award Vesting"; body lists vest date,
shares released, FMV, shares sold to cover, and USD tax withheld.
(Activity, Activity, VestEvent) tuple: the gross vest (BUY at FMV),
the sell-to-cover (SELL at FMV), and a standalone VestEvent for the
payslip-ingest reconciliation pipeline.
On any parse failure we return the neutral empty result (no Activities,
no VestEvent) an unparseable email shouldn't crash the IMAP batch.
On any parse failure we return an empty list an unparseable email
shouldn't crash the IMAP batch.
"""
from __future__ import annotations
import logging
import re
from dataclasses import dataclass
from decimal import Decimal, InvalidOperation
from bs4 import BeautifulSoup
from dateutil import parser as dateparser
from broker_sync.models import AccountType, Activity, ActivityType, VestEvent
from broker_sync.models import AccountType, Activity, ActivityType
log = logging.getLogger(__name__)
_ACCOUNT_ID = "schwab-workplace"
_DEFAULT_CURRENCY = "USD"
# Vest-confirmation emails reliably include one of these phrases. Matching
# is case-insensitive and on the raw HTML (cheap — no DOM parse needed).
_VEST_SUBJECT_RE = re.compile(r"Release Confirmation|Award Vesting|RSU Release",
re.IGNORECASE)
@dataclass
class VestParseResult:
activities: list[Activity]
vest_event: VestEvent | None
def parse_schwab_email(raw_html: str) -> list[Activity]:
"""Return a single-item list of Activity on success, empty on failure.
For vest-confirmation emails, returns the two Activity rows (gross
vest + sell-to-cover). Use `parse_schwab_email_full` when the caller
also needs the VestEvent.
"""
return parse_schwab_email_full(raw_html).activities
def parse_schwab_email_full(raw_html: str) -> VestParseResult:
"""Full parse — returns activities + optional VestEvent.
Dispatches: vest-confirmation emails `_parse_vest_release`;
everything else the legacy single-row confirmation parser.
"""
if _VEST_SUBJECT_RE.search(raw_html):
result = _parse_vest_release(raw_html)
if result is not None:
return result
log.warning("schwab: detected vest email but could not extract fields; "
"add a real fixture to broker-sync/tests/fixtures/")
return VestParseResult(activities=[], vest_event=None)
return VestParseResult(activities=_parse_trade_confirmation(raw_html), vest_event=None)
def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
"""Legacy 5-cell trade confirmation parser."""
"""Return a one-element list of Activity on success, empty on failure."""
try:
soup = BeautifulSoup(raw_html, "html.parser")
cells = [
@ -90,8 +48,6 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
direction = (ActivityType.SELL
if direction_txt.strip().lower() == "sold" else ActivityType.BUY)
quantity = Decimal(qty_txt.replace(",", "").strip())
# Price like "$123.45" — strip the currency sign and parse the numeric tail.
# Handle "£", "€", "USD", etc. by taking the last numeric span.
price_clean = price_txt
for sign in ("$", "£", "", "USD", "GBP", "EUR"):
price_clean = price_clean.replace(sign, "")
@ -115,126 +71,3 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
]
except (ValueError, InvalidOperation, IndexError, AttributeError):
return []
# Heuristic extractors for vest-release emails. Labels observed in public
# Schwab RSU release samples; real fixture needed to tighten these.
_VEST_DATE_RE = re.compile(
r"(?:Release Date|Vest Date|Vesting Date)\s*[:<][^0-9]*"
r"(\d{1,2}[\s/\-][A-Za-z]{3}[\s/\-]\d{2,4}|\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})",
re.IGNORECASE)
_VEST_TICKER_RE = re.compile(r"(?:Ticker|Symbol)\s*[:<]\s*([A-Z]{2,5})",
re.IGNORECASE)
_VEST_SHARES_RELEASED_RE = re.compile(
r"(?:Shares Released|Total Shares (?:Released|Vested))\s*[:<]\s*"
r"([\d,]+(?:\.\d+)?)",
re.IGNORECASE)
_VEST_SHARES_WITHHELD_RE = re.compile(
r"(?:Shares (?:Withheld|Sold)(?: for Taxes)?)\s*[:<]\s*"
r"([\d,]+(?:\.\d+)?)",
re.IGNORECASE)
_VEST_FMV_RE = re.compile(
r"(?:Market Price|FMV|Fair Market Value)\s*[:<]\s*"
r"\$?\s*([\d,]+(?:\.\d+)?)",
re.IGNORECASE)
_VEST_TAX_USD_RE = re.compile(
r"(?:Tax Withholding Amount|Total Tax Withholding|Tax Withheld)\s*[:<]\s*"
r"\$?\s*([\d,]+(?:\.\d+)?)",
re.IGNORECASE)
def _parse_vest_release(raw_html: str) -> VestParseResult | None:
"""Best-effort extraction from a Schwab Release Confirmation email.
Runs label regexes on the plain-text view of the HTML. Returns None
(signalling fall-through) if the core four fields (date, ticker,
shares released, FMV) don't all resolve — that's a strong signal the
heuristics need a real fixture before they can be trusted on a live
email.
"""
try:
soup = BeautifulSoup(raw_html, "html.parser")
text = soup.get_text(" ", strip=True)
except Exception:
return None
date_str = _search_group(_VEST_DATE_RE, text)
ticker = _search_group(_VEST_TICKER_RE, text)
shares_released_str = _search_group(_VEST_SHARES_RELEASED_RE, text)
fmv_str = _search_group(_VEST_FMV_RE, text)
if not (date_str and ticker and shares_released_str and fmv_str):
return None
try:
vest_date = dateparser.parse(date_str)
shares_vested = Decimal(shares_released_str.replace(",", ""))
fmv = Decimal(fmv_str.replace(",", ""))
except (ValueError, InvalidOperation):
return None
shares_sold_str = _search_group(_VEST_SHARES_WITHHELD_RE, text)
shares_sold_to_cover = (Decimal(shares_sold_str.replace(",", ""))
if shares_sold_str else None)
tax_usd_str = _search_group(_VEST_TAX_USD_RE, text)
tax_withheld_usd = (Decimal(tax_usd_str.replace(",", ""))
if tax_usd_str else None)
external_id = (f"schwab:{vest_date.date().isoformat()}:{ticker}:VEST:"
f"{shares_vested}")
vest_event = VestEvent(
external_id=external_id,
vest_date=vest_date,
ticker=ticker,
shares_vested=shares_vested,
shares_sold_to_cover=shares_sold_to_cover,
fmv_at_vest_usd=fmv,
tax_withheld_usd=tax_withheld_usd,
source="schwab_email",
raw={
"date": date_str,
"ticker": ticker,
"shares_released": shares_released_str,
"fmv": fmv_str,
"shares_withheld": shares_sold_str or "",
"tax_withheld": tax_usd_str or "",
},
)
# Sibling Activities for Wealthfolio: full vest as BUY, sell-to-cover
# slice as SELL, both at the same FMV so net cash = 0 on that day.
activities: list[Activity] = [
Activity(
external_id=f"{external_id}:BUY",
account_id=_ACCOUNT_ID,
account_type=AccountType.GIA,
date=vest_date,
activity_type=ActivityType.BUY,
symbol=ticker,
quantity=shares_vested,
unit_price=fmv,
currency=_DEFAULT_CURRENCY,
notes="schwab-vest-release",
)
]
if shares_sold_to_cover is not None and shares_sold_to_cover > 0:
activities.append(
Activity(
external_id=f"{external_id}:SELL_TO_COVER",
account_id=_ACCOUNT_ID,
account_type=AccountType.GIA,
date=vest_date,
activity_type=ActivityType.SELL,
symbol=ticker,
quantity=shares_sold_to_cover,
unit_price=fmv,
currency=_DEFAULT_CURRENCY,
notes="schwab-sell-to-cover",
))
return VestParseResult(activities=activities, vest_event=vest_event)
def _search_group(pattern: re.Pattern[str], text: str) -> str | None:
m = pattern.search(text)
return m.group(1).strip() if m else None

View file

@ -82,59 +82,3 @@ def test_price_with_commas_parses() -> None:
html = _SELL.replace("$612.34", "$1,612.34")
a = parse_schwab_email(html)[0]
assert a.unit_price == Decimal("1612.34")
# --- Vest-release parsing -------------------------------------------------
_VEST_RELEASE = """<html><body>
<h2>Release Confirmation</h2>
<p>
Release Date: 15 Mar 2026
Ticker: META
Total Shares Released: 100.0
Market Price: $612.34
Shares Withheld for Taxes: 45
Tax Withholding Amount: $27,555.30
</p>
</body></html>"""
def test_vest_release_returns_two_activities_and_vest_event() -> None:
"""Release Confirmation yields a BUY (full vest) + SELL (sell-to-cover) + VestEvent."""
from broker_sync.providers.parsers.schwab import parse_schwab_email_full
result = parse_schwab_email_full(_VEST_RELEASE)
assert result.vest_event is not None
assert result.vest_event.ticker == "META"
assert result.vest_event.shares_vested == Decimal("100.0")
assert result.vest_event.shares_sold_to_cover == Decimal("45")
assert result.vest_event.fmv_at_vest_usd == Decimal("612.34")
assert result.vest_event.tax_withheld_usd == Decimal("27555.30")
assert result.vest_event.vest_date.date().isoformat() == "2026-03-15"
assert result.vest_event.external_id.startswith("schwab:2026-03-15:META:VEST:")
assert len(result.activities) == 2
buy = result.activities[0]
assert buy.activity_type is ActivityType.BUY
assert buy.quantity == Decimal("100.0")
sell = result.activities[1]
assert sell.activity_type is ActivityType.SELL
assert sell.quantity == Decimal("45")
assert sell.unit_price == Decimal("612.34")
def test_vest_email_with_unparseable_body_returns_empty() -> None:
"""Subject says Release Confirmation but fields missing → empty result, no crash."""
from broker_sync.providers.parsers.schwab import parse_schwab_email_full
html = "<html><body>Release Confirmation — please contact support</body></html>"
result = parse_schwab_email_full(html)
assert result.vest_event is None
assert result.activities == []
def test_back_compat_parse_schwab_email_drops_vest_event() -> None:
"""The legacy list[Activity] shape remains stable for existing callers."""
acts = parse_schwab_email(_VEST_RELEASE)
assert len(acts) == 2
assert all(isinstance(a.activity_type, ActivityType) for a in acts)