parsers/schwab: drop dead vest-release path
The _parse_vest_release path and _VEST_*_RE regexes never matched a real email in 4 years of inbox history (2022-08 → 2026-05, 188 Schwab emails surveyed). Schwab Stock Plan Services does not email release confirmations to the employee address for the workplace account — only the sell-to-cover trade-executed alert lands. Vest data must come from the META payslip via payslip-ingest (tracked as code-fqgr). Removed: - _VEST_SUBJECT_RE + 5 _VEST_*_RE regexes (heuristic, never validated) - _parse_vest_release function - VestParseResult dataclass - parse_schwab_email_full wrapper - _search_group helper (only used by vest path) - 3 dead tests + the _VEST_RELEASE fixture Kept models.VestEvent — the payslip→Wealthfolio sink in code-fqgr will need it. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
bb9e0d4567
commit
abf9fa7cb5
2 changed files with 13 additions and 236 deletions
|
|
@ -1,79 +1,37 @@
|
|||
"""Schwab workplace-RSU email parser.
|
||||
|
||||
Two email shapes are handled:
|
||||
Schwab Stock Plan Services sends a "Your trade was executed" email for
|
||||
each sell-to-cover trade (and any user-initiated trade) on the workplace
|
||||
account. The body has five `<td class="dark-background-body" align="right">`
|
||||
cells holding date / direction / quantity / ticker / price.
|
||||
|
||||
1. Trade confirmations (sell-to-cover or user-initiated trades): HTML
|
||||
with five `<td class="dark-background-body" align="right">` cells
|
||||
holding date / direction / quantity / ticker / price. → one Activity.
|
||||
It does NOT email vest-release / Release Confirmation messages to the
|
||||
employee address for this account (verified against 4 years of inbox
|
||||
history, 2022-2026 — see infra/docs in code-fqgr). Vest data must come
|
||||
from the META payslip via payslip-ingest, not from email. The whole
|
||||
vest-release parser that used to live here was dead code.
|
||||
|
||||
2. Release Confirmations (RSU vest events): subject/body mentions
|
||||
"Release Confirmation" or "Award Vesting"; body lists vest date,
|
||||
shares released, FMV, shares sold to cover, and USD tax withheld.
|
||||
→ (Activity, Activity, VestEvent) tuple: the gross vest (BUY at FMV),
|
||||
the sell-to-cover (SELL at FMV), and a standalone VestEvent for the
|
||||
payslip-ingest reconciliation pipeline.
|
||||
|
||||
On any parse failure we return the neutral empty result (no Activities,
|
||||
no VestEvent) — an unparseable email shouldn't crash the IMAP batch.
|
||||
On any parse failure we return an empty list — an unparseable email
|
||||
shouldn't crash the IMAP batch.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from decimal import Decimal, InvalidOperation
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from dateutil import parser as dateparser
|
||||
|
||||
from broker_sync.models import AccountType, Activity, ActivityType, VestEvent
|
||||
from broker_sync.models import AccountType, Activity, ActivityType
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_ACCOUNT_ID = "schwab-workplace"
|
||||
_DEFAULT_CURRENCY = "USD"
|
||||
|
||||
# Vest-confirmation emails reliably include one of these phrases. Matching
|
||||
# is case-insensitive and on the raw HTML (cheap — no DOM parse needed).
|
||||
_VEST_SUBJECT_RE = re.compile(r"Release Confirmation|Award Vesting|RSU Release",
|
||||
re.IGNORECASE)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VestParseResult:
|
||||
activities: list[Activity]
|
||||
vest_event: VestEvent | None
|
||||
|
||||
|
||||
def parse_schwab_email(raw_html: str) -> list[Activity]:
|
||||
"""Return a single-item list of Activity on success, empty on failure.
|
||||
|
||||
For vest-confirmation emails, returns the two Activity rows (gross
|
||||
vest + sell-to-cover). Use `parse_schwab_email_full` when the caller
|
||||
also needs the VestEvent.
|
||||
"""
|
||||
return parse_schwab_email_full(raw_html).activities
|
||||
|
||||
|
||||
def parse_schwab_email_full(raw_html: str) -> VestParseResult:
|
||||
"""Full parse — returns activities + optional VestEvent.
|
||||
|
||||
Dispatches: vest-confirmation emails → `_parse_vest_release`;
|
||||
everything else → the legacy single-row confirmation parser.
|
||||
"""
|
||||
if _VEST_SUBJECT_RE.search(raw_html):
|
||||
result = _parse_vest_release(raw_html)
|
||||
if result is not None:
|
||||
return result
|
||||
log.warning("schwab: detected vest email but could not extract fields; "
|
||||
"add a real fixture to broker-sync/tests/fixtures/")
|
||||
return VestParseResult(activities=[], vest_event=None)
|
||||
|
||||
return VestParseResult(activities=_parse_trade_confirmation(raw_html), vest_event=None)
|
||||
|
||||
|
||||
def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
|
||||
"""Legacy 5-cell trade confirmation parser."""
|
||||
"""Return a one-element list of Activity on success, empty on failure."""
|
||||
try:
|
||||
soup = BeautifulSoup(raw_html, "html.parser")
|
||||
cells = [
|
||||
|
|
@ -90,8 +48,6 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
|
|||
direction = (ActivityType.SELL
|
||||
if direction_txt.strip().lower() == "sold" else ActivityType.BUY)
|
||||
quantity = Decimal(qty_txt.replace(",", "").strip())
|
||||
# Price like "$123.45" — strip the currency sign and parse the numeric tail.
|
||||
# Handle "£", "€", "USD", etc. by taking the last numeric span.
|
||||
price_clean = price_txt
|
||||
for sign in ("$", "£", "€", "USD", "GBP", "EUR"):
|
||||
price_clean = price_clean.replace(sign, "")
|
||||
|
|
@ -115,126 +71,3 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
|
|||
]
|
||||
except (ValueError, InvalidOperation, IndexError, AttributeError):
|
||||
return []
|
||||
|
||||
|
||||
# Heuristic extractors for vest-release emails. Labels observed in public
|
||||
# Schwab RSU release samples; real fixture needed to tighten these.
|
||||
_VEST_DATE_RE = re.compile(
|
||||
r"(?:Release Date|Vest Date|Vesting Date)\s*[:<][^0-9]*"
|
||||
r"(\d{1,2}[\s/\-][A-Za-z]{3}[\s/\-]\d{2,4}|\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})",
|
||||
re.IGNORECASE)
|
||||
_VEST_TICKER_RE = re.compile(r"(?:Ticker|Symbol)\s*[:<]\s*([A-Z]{2,5})",
|
||||
re.IGNORECASE)
|
||||
_VEST_SHARES_RELEASED_RE = re.compile(
|
||||
r"(?:Shares Released|Total Shares (?:Released|Vested))\s*[:<]\s*"
|
||||
r"([\d,]+(?:\.\d+)?)",
|
||||
re.IGNORECASE)
|
||||
_VEST_SHARES_WITHHELD_RE = re.compile(
|
||||
r"(?:Shares (?:Withheld|Sold)(?: for Taxes)?)\s*[:<]\s*"
|
||||
r"([\d,]+(?:\.\d+)?)",
|
||||
re.IGNORECASE)
|
||||
_VEST_FMV_RE = re.compile(
|
||||
r"(?:Market Price|FMV|Fair Market Value)\s*[:<]\s*"
|
||||
r"\$?\s*([\d,]+(?:\.\d+)?)",
|
||||
re.IGNORECASE)
|
||||
_VEST_TAX_USD_RE = re.compile(
|
||||
r"(?:Tax Withholding Amount|Total Tax Withholding|Tax Withheld)\s*[:<]\s*"
|
||||
r"\$?\s*([\d,]+(?:\.\d+)?)",
|
||||
re.IGNORECASE)
|
||||
|
||||
|
||||
def _parse_vest_release(raw_html: str) -> VestParseResult | None:
|
||||
"""Best-effort extraction from a Schwab Release Confirmation email.
|
||||
|
||||
Runs label regexes on the plain-text view of the HTML. Returns None
|
||||
(signalling fall-through) if the core four fields (date, ticker,
|
||||
shares released, FMV) don't all resolve — that's a strong signal the
|
||||
heuristics need a real fixture before they can be trusted on a live
|
||||
email.
|
||||
"""
|
||||
try:
|
||||
soup = BeautifulSoup(raw_html, "html.parser")
|
||||
text = soup.get_text(" ", strip=True)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
date_str = _search_group(_VEST_DATE_RE, text)
|
||||
ticker = _search_group(_VEST_TICKER_RE, text)
|
||||
shares_released_str = _search_group(_VEST_SHARES_RELEASED_RE, text)
|
||||
fmv_str = _search_group(_VEST_FMV_RE, text)
|
||||
if not (date_str and ticker and shares_released_str and fmv_str):
|
||||
return None
|
||||
|
||||
try:
|
||||
vest_date = dateparser.parse(date_str)
|
||||
shares_vested = Decimal(shares_released_str.replace(",", ""))
|
||||
fmv = Decimal(fmv_str.replace(",", ""))
|
||||
except (ValueError, InvalidOperation):
|
||||
return None
|
||||
|
||||
shares_sold_str = _search_group(_VEST_SHARES_WITHHELD_RE, text)
|
||||
shares_sold_to_cover = (Decimal(shares_sold_str.replace(",", ""))
|
||||
if shares_sold_str else None)
|
||||
tax_usd_str = _search_group(_VEST_TAX_USD_RE, text)
|
||||
tax_withheld_usd = (Decimal(tax_usd_str.replace(",", ""))
|
||||
if tax_usd_str else None)
|
||||
|
||||
external_id = (f"schwab:{vest_date.date().isoformat()}:{ticker}:VEST:"
|
||||
f"{shares_vested}")
|
||||
|
||||
vest_event = VestEvent(
|
||||
external_id=external_id,
|
||||
vest_date=vest_date,
|
||||
ticker=ticker,
|
||||
shares_vested=shares_vested,
|
||||
shares_sold_to_cover=shares_sold_to_cover,
|
||||
fmv_at_vest_usd=fmv,
|
||||
tax_withheld_usd=tax_withheld_usd,
|
||||
source="schwab_email",
|
||||
raw={
|
||||
"date": date_str,
|
||||
"ticker": ticker,
|
||||
"shares_released": shares_released_str,
|
||||
"fmv": fmv_str,
|
||||
"shares_withheld": shares_sold_str or "",
|
||||
"tax_withheld": tax_usd_str or "",
|
||||
},
|
||||
)
|
||||
|
||||
# Sibling Activities for Wealthfolio: full vest as BUY, sell-to-cover
|
||||
# slice as SELL, both at the same FMV so net cash = 0 on that day.
|
||||
activities: list[Activity] = [
|
||||
Activity(
|
||||
external_id=f"{external_id}:BUY",
|
||||
account_id=_ACCOUNT_ID,
|
||||
account_type=AccountType.GIA,
|
||||
date=vest_date,
|
||||
activity_type=ActivityType.BUY,
|
||||
symbol=ticker,
|
||||
quantity=shares_vested,
|
||||
unit_price=fmv,
|
||||
currency=_DEFAULT_CURRENCY,
|
||||
notes="schwab-vest-release",
|
||||
)
|
||||
]
|
||||
if shares_sold_to_cover is not None and shares_sold_to_cover > 0:
|
||||
activities.append(
|
||||
Activity(
|
||||
external_id=f"{external_id}:SELL_TO_COVER",
|
||||
account_id=_ACCOUNT_ID,
|
||||
account_type=AccountType.GIA,
|
||||
date=vest_date,
|
||||
activity_type=ActivityType.SELL,
|
||||
symbol=ticker,
|
||||
quantity=shares_sold_to_cover,
|
||||
unit_price=fmv,
|
||||
currency=_DEFAULT_CURRENCY,
|
||||
notes="schwab-sell-to-cover",
|
||||
))
|
||||
|
||||
return VestParseResult(activities=activities, vest_event=vest_event)
|
||||
|
||||
|
||||
def _search_group(pattern: re.Pattern[str], text: str) -> str | None:
|
||||
m = pattern.search(text)
|
||||
return m.group(1).strip() if m else None
|
||||
|
|
|
|||
|
|
@ -82,59 +82,3 @@ def test_price_with_commas_parses() -> None:
|
|||
html = _SELL.replace("$612.34", "$1,612.34")
|
||||
a = parse_schwab_email(html)[0]
|
||||
assert a.unit_price == Decimal("1612.34")
|
||||
|
||||
|
||||
# --- Vest-release parsing -------------------------------------------------
|
||||
|
||||
_VEST_RELEASE = """<html><body>
|
||||
<h2>Release Confirmation</h2>
|
||||
<p>
|
||||
Release Date: 15 Mar 2026
|
||||
Ticker: META
|
||||
Total Shares Released: 100.0
|
||||
Market Price: $612.34
|
||||
Shares Withheld for Taxes: 45
|
||||
Tax Withholding Amount: $27,555.30
|
||||
</p>
|
||||
</body></html>"""
|
||||
|
||||
|
||||
def test_vest_release_returns_two_activities_and_vest_event() -> None:
|
||||
"""Release Confirmation yields a BUY (full vest) + SELL (sell-to-cover) + VestEvent."""
|
||||
from broker_sync.providers.parsers.schwab import parse_schwab_email_full
|
||||
|
||||
result = parse_schwab_email_full(_VEST_RELEASE)
|
||||
assert result.vest_event is not None
|
||||
assert result.vest_event.ticker == "META"
|
||||
assert result.vest_event.shares_vested == Decimal("100.0")
|
||||
assert result.vest_event.shares_sold_to_cover == Decimal("45")
|
||||
assert result.vest_event.fmv_at_vest_usd == Decimal("612.34")
|
||||
assert result.vest_event.tax_withheld_usd == Decimal("27555.30")
|
||||
assert result.vest_event.vest_date.date().isoformat() == "2026-03-15"
|
||||
assert result.vest_event.external_id.startswith("schwab:2026-03-15:META:VEST:")
|
||||
|
||||
assert len(result.activities) == 2
|
||||
buy = result.activities[0]
|
||||
assert buy.activity_type is ActivityType.BUY
|
||||
assert buy.quantity == Decimal("100.0")
|
||||
sell = result.activities[1]
|
||||
assert sell.activity_type is ActivityType.SELL
|
||||
assert sell.quantity == Decimal("45")
|
||||
assert sell.unit_price == Decimal("612.34")
|
||||
|
||||
|
||||
def test_vest_email_with_unparseable_body_returns_empty() -> None:
|
||||
"""Subject says Release Confirmation but fields missing → empty result, no crash."""
|
||||
from broker_sync.providers.parsers.schwab import parse_schwab_email_full
|
||||
|
||||
html = "<html><body>Release Confirmation — please contact support</body></html>"
|
||||
result = parse_schwab_email_full(html)
|
||||
assert result.vest_event is None
|
||||
assert result.activities == []
|
||||
|
||||
|
||||
def test_back_compat_parse_schwab_email_drops_vest_event() -> None:
|
||||
"""The legacy list[Activity] shape remains stable for existing callers."""
|
||||
acts = parse_schwab_email(_VEST_RELEASE)
|
||||
assert len(acts) == 2
|
||||
assert all(isinstance(a.activity_type, ActivityType) for a in acts)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue