parsers/schwab: drop dead vest-release path
The _parse_vest_release path and _VEST_*_RE regexes never matched a real email in 4 years of inbox history (2022-08 → 2026-05, 188 Schwab emails surveyed). Schwab Stock Plan Services does not email release confirmations to the employee address for the workplace account — only the sell-to-cover trade-executed alert lands. Vest data must come from the META payslip via payslip-ingest (tracked as code-fqgr). Removed: - _VEST_SUBJECT_RE + 5 _VEST_*_RE regexes (heuristic, never validated) - _parse_vest_release function - VestParseResult dataclass - parse_schwab_email_full wrapper - _search_group helper (only used by vest path) - 3 dead tests + the _VEST_RELEASE fixture Kept models.VestEvent — the payslip→Wealthfolio sink in code-fqgr will need it. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
bb9e0d4567
commit
abf9fa7cb5
2 changed files with 13 additions and 236 deletions
|
|
@ -1,79 +1,37 @@
|
||||||
"""Schwab workplace-RSU email parser.
|
"""Schwab workplace-RSU email parser.
|
||||||
|
|
||||||
Two email shapes are handled:
|
Schwab Stock Plan Services sends a "Your trade was executed" email for
|
||||||
|
each sell-to-cover trade (and any user-initiated trade) on the workplace
|
||||||
|
account. The body has five `<td class="dark-background-body" align="right">`
|
||||||
|
cells holding date / direction / quantity / ticker / price.
|
||||||
|
|
||||||
1. Trade confirmations (sell-to-cover or user-initiated trades): HTML
|
It does NOT email vest-release / Release Confirmation messages to the
|
||||||
with five `<td class="dark-background-body" align="right">` cells
|
employee address for this account (verified against 4 years of inbox
|
||||||
holding date / direction / quantity / ticker / price. → one Activity.
|
history, 2022-2026 — see infra/docs in code-fqgr). Vest data must come
|
||||||
|
from the META payslip via payslip-ingest, not from email. The whole
|
||||||
|
vest-release parser that used to live here was dead code.
|
||||||
|
|
||||||
2. Release Confirmations (RSU vest events): subject/body mentions
|
On any parse failure we return an empty list — an unparseable email
|
||||||
"Release Confirmation" or "Award Vesting"; body lists vest date,
|
shouldn't crash the IMAP batch.
|
||||||
shares released, FMV, shares sold to cover, and USD tax withheld.
|
|
||||||
→ (Activity, Activity, VestEvent) tuple: the gross vest (BUY at FMV),
|
|
||||||
the sell-to-cover (SELL at FMV), and a standalone VestEvent for the
|
|
||||||
payslip-ingest reconciliation pipeline.
|
|
||||||
|
|
||||||
On any parse failure we return the neutral empty result (no Activities,
|
|
||||||
no VestEvent) — an unparseable email shouldn't crash the IMAP batch.
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from decimal import Decimal, InvalidOperation
|
from decimal import Decimal, InvalidOperation
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from dateutil import parser as dateparser
|
from dateutil import parser as dateparser
|
||||||
|
|
||||||
from broker_sync.models import AccountType, Activity, ActivityType, VestEvent
|
from broker_sync.models import AccountType, Activity, ActivityType
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
_ACCOUNT_ID = "schwab-workplace"
|
_ACCOUNT_ID = "schwab-workplace"
|
||||||
_DEFAULT_CURRENCY = "USD"
|
_DEFAULT_CURRENCY = "USD"
|
||||||
|
|
||||||
# Vest-confirmation emails reliably include one of these phrases. Matching
|
|
||||||
# is case-insensitive and on the raw HTML (cheap — no DOM parse needed).
|
|
||||||
_VEST_SUBJECT_RE = re.compile(r"Release Confirmation|Award Vesting|RSU Release",
|
|
||||||
re.IGNORECASE)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class VestParseResult:
|
|
||||||
activities: list[Activity]
|
|
||||||
vest_event: VestEvent | None
|
|
||||||
|
|
||||||
|
|
||||||
def parse_schwab_email(raw_html: str) -> list[Activity]:
|
def parse_schwab_email(raw_html: str) -> list[Activity]:
|
||||||
"""Return a single-item list of Activity on success, empty on failure.
|
"""Return a one-element list of Activity on success, empty on failure."""
|
||||||
|
|
||||||
For vest-confirmation emails, returns the two Activity rows (gross
|
|
||||||
vest + sell-to-cover). Use `parse_schwab_email_full` when the caller
|
|
||||||
also needs the VestEvent.
|
|
||||||
"""
|
|
||||||
return parse_schwab_email_full(raw_html).activities
|
|
||||||
|
|
||||||
|
|
||||||
def parse_schwab_email_full(raw_html: str) -> VestParseResult:
|
|
||||||
"""Full parse — returns activities + optional VestEvent.
|
|
||||||
|
|
||||||
Dispatches: vest-confirmation emails → `_parse_vest_release`;
|
|
||||||
everything else → the legacy single-row confirmation parser.
|
|
||||||
"""
|
|
||||||
if _VEST_SUBJECT_RE.search(raw_html):
|
|
||||||
result = _parse_vest_release(raw_html)
|
|
||||||
if result is not None:
|
|
||||||
return result
|
|
||||||
log.warning("schwab: detected vest email but could not extract fields; "
|
|
||||||
"add a real fixture to broker-sync/tests/fixtures/")
|
|
||||||
return VestParseResult(activities=[], vest_event=None)
|
|
||||||
|
|
||||||
return VestParseResult(activities=_parse_trade_confirmation(raw_html), vest_event=None)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
|
|
||||||
"""Legacy 5-cell trade confirmation parser."""
|
|
||||||
try:
|
try:
|
||||||
soup = BeautifulSoup(raw_html, "html.parser")
|
soup = BeautifulSoup(raw_html, "html.parser")
|
||||||
cells = [
|
cells = [
|
||||||
|
|
@ -90,8 +48,6 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
|
||||||
direction = (ActivityType.SELL
|
direction = (ActivityType.SELL
|
||||||
if direction_txt.strip().lower() == "sold" else ActivityType.BUY)
|
if direction_txt.strip().lower() == "sold" else ActivityType.BUY)
|
||||||
quantity = Decimal(qty_txt.replace(",", "").strip())
|
quantity = Decimal(qty_txt.replace(",", "").strip())
|
||||||
# Price like "$123.45" — strip the currency sign and parse the numeric tail.
|
|
||||||
# Handle "£", "€", "USD", etc. by taking the last numeric span.
|
|
||||||
price_clean = price_txt
|
price_clean = price_txt
|
||||||
for sign in ("$", "£", "€", "USD", "GBP", "EUR"):
|
for sign in ("$", "£", "€", "USD", "GBP", "EUR"):
|
||||||
price_clean = price_clean.replace(sign, "")
|
price_clean = price_clean.replace(sign, "")
|
||||||
|
|
@ -115,126 +71,3 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
|
||||||
]
|
]
|
||||||
except (ValueError, InvalidOperation, IndexError, AttributeError):
|
except (ValueError, InvalidOperation, IndexError, AttributeError):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
# Heuristic extractors for vest-release emails. Labels observed in public
|
|
||||||
# Schwab RSU release samples; real fixture needed to tighten these.
|
|
||||||
_VEST_DATE_RE = re.compile(
|
|
||||||
r"(?:Release Date|Vest Date|Vesting Date)\s*[:<][^0-9]*"
|
|
||||||
r"(\d{1,2}[\s/\-][A-Za-z]{3}[\s/\-]\d{2,4}|\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})",
|
|
||||||
re.IGNORECASE)
|
|
||||||
_VEST_TICKER_RE = re.compile(r"(?:Ticker|Symbol)\s*[:<]\s*([A-Z]{2,5})",
|
|
||||||
re.IGNORECASE)
|
|
||||||
_VEST_SHARES_RELEASED_RE = re.compile(
|
|
||||||
r"(?:Shares Released|Total Shares (?:Released|Vested))\s*[:<]\s*"
|
|
||||||
r"([\d,]+(?:\.\d+)?)",
|
|
||||||
re.IGNORECASE)
|
|
||||||
_VEST_SHARES_WITHHELD_RE = re.compile(
|
|
||||||
r"(?:Shares (?:Withheld|Sold)(?: for Taxes)?)\s*[:<]\s*"
|
|
||||||
r"([\d,]+(?:\.\d+)?)",
|
|
||||||
re.IGNORECASE)
|
|
||||||
_VEST_FMV_RE = re.compile(
|
|
||||||
r"(?:Market Price|FMV|Fair Market Value)\s*[:<]\s*"
|
|
||||||
r"\$?\s*([\d,]+(?:\.\d+)?)",
|
|
||||||
re.IGNORECASE)
|
|
||||||
_VEST_TAX_USD_RE = re.compile(
|
|
||||||
r"(?:Tax Withholding Amount|Total Tax Withholding|Tax Withheld)\s*[:<]\s*"
|
|
||||||
r"\$?\s*([\d,]+(?:\.\d+)?)",
|
|
||||||
re.IGNORECASE)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_vest_release(raw_html: str) -> VestParseResult | None:
|
|
||||||
"""Best-effort extraction from a Schwab Release Confirmation email.
|
|
||||||
|
|
||||||
Runs label regexes on the plain-text view of the HTML. Returns None
|
|
||||||
(signalling fall-through) if the core four fields (date, ticker,
|
|
||||||
shares released, FMV) don't all resolve — that's a strong signal the
|
|
||||||
heuristics need a real fixture before they can be trusted on a live
|
|
||||||
email.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
soup = BeautifulSoup(raw_html, "html.parser")
|
|
||||||
text = soup.get_text(" ", strip=True)
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
date_str = _search_group(_VEST_DATE_RE, text)
|
|
||||||
ticker = _search_group(_VEST_TICKER_RE, text)
|
|
||||||
shares_released_str = _search_group(_VEST_SHARES_RELEASED_RE, text)
|
|
||||||
fmv_str = _search_group(_VEST_FMV_RE, text)
|
|
||||||
if not (date_str and ticker and shares_released_str and fmv_str):
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
vest_date = dateparser.parse(date_str)
|
|
||||||
shares_vested = Decimal(shares_released_str.replace(",", ""))
|
|
||||||
fmv = Decimal(fmv_str.replace(",", ""))
|
|
||||||
except (ValueError, InvalidOperation):
|
|
||||||
return None
|
|
||||||
|
|
||||||
shares_sold_str = _search_group(_VEST_SHARES_WITHHELD_RE, text)
|
|
||||||
shares_sold_to_cover = (Decimal(shares_sold_str.replace(",", ""))
|
|
||||||
if shares_sold_str else None)
|
|
||||||
tax_usd_str = _search_group(_VEST_TAX_USD_RE, text)
|
|
||||||
tax_withheld_usd = (Decimal(tax_usd_str.replace(",", ""))
|
|
||||||
if tax_usd_str else None)
|
|
||||||
|
|
||||||
external_id = (f"schwab:{vest_date.date().isoformat()}:{ticker}:VEST:"
|
|
||||||
f"{shares_vested}")
|
|
||||||
|
|
||||||
vest_event = VestEvent(
|
|
||||||
external_id=external_id,
|
|
||||||
vest_date=vest_date,
|
|
||||||
ticker=ticker,
|
|
||||||
shares_vested=shares_vested,
|
|
||||||
shares_sold_to_cover=shares_sold_to_cover,
|
|
||||||
fmv_at_vest_usd=fmv,
|
|
||||||
tax_withheld_usd=tax_withheld_usd,
|
|
||||||
source="schwab_email",
|
|
||||||
raw={
|
|
||||||
"date": date_str,
|
|
||||||
"ticker": ticker,
|
|
||||||
"shares_released": shares_released_str,
|
|
||||||
"fmv": fmv_str,
|
|
||||||
"shares_withheld": shares_sold_str or "",
|
|
||||||
"tax_withheld": tax_usd_str or "",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sibling Activities for Wealthfolio: full vest as BUY, sell-to-cover
|
|
||||||
# slice as SELL, both at the same FMV so net cash = 0 on that day.
|
|
||||||
activities: list[Activity] = [
|
|
||||||
Activity(
|
|
||||||
external_id=f"{external_id}:BUY",
|
|
||||||
account_id=_ACCOUNT_ID,
|
|
||||||
account_type=AccountType.GIA,
|
|
||||||
date=vest_date,
|
|
||||||
activity_type=ActivityType.BUY,
|
|
||||||
symbol=ticker,
|
|
||||||
quantity=shares_vested,
|
|
||||||
unit_price=fmv,
|
|
||||||
currency=_DEFAULT_CURRENCY,
|
|
||||||
notes="schwab-vest-release",
|
|
||||||
)
|
|
||||||
]
|
|
||||||
if shares_sold_to_cover is not None and shares_sold_to_cover > 0:
|
|
||||||
activities.append(
|
|
||||||
Activity(
|
|
||||||
external_id=f"{external_id}:SELL_TO_COVER",
|
|
||||||
account_id=_ACCOUNT_ID,
|
|
||||||
account_type=AccountType.GIA,
|
|
||||||
date=vest_date,
|
|
||||||
activity_type=ActivityType.SELL,
|
|
||||||
symbol=ticker,
|
|
||||||
quantity=shares_sold_to_cover,
|
|
||||||
unit_price=fmv,
|
|
||||||
currency=_DEFAULT_CURRENCY,
|
|
||||||
notes="schwab-sell-to-cover",
|
|
||||||
))
|
|
||||||
|
|
||||||
return VestParseResult(activities=activities, vest_event=vest_event)
|
|
||||||
|
|
||||||
|
|
||||||
def _search_group(pattern: re.Pattern[str], text: str) -> str | None:
|
|
||||||
m = pattern.search(text)
|
|
||||||
return m.group(1).strip() if m else None
|
|
||||||
|
|
|
||||||
|
|
@ -82,59 +82,3 @@ def test_price_with_commas_parses() -> None:
|
||||||
html = _SELL.replace("$612.34", "$1,612.34")
|
html = _SELL.replace("$612.34", "$1,612.34")
|
||||||
a = parse_schwab_email(html)[0]
|
a = parse_schwab_email(html)[0]
|
||||||
assert a.unit_price == Decimal("1612.34")
|
assert a.unit_price == Decimal("1612.34")
|
||||||
|
|
||||||
|
|
||||||
# --- Vest-release parsing -------------------------------------------------
|
|
||||||
|
|
||||||
_VEST_RELEASE = """<html><body>
|
|
||||||
<h2>Release Confirmation</h2>
|
|
||||||
<p>
|
|
||||||
Release Date: 15 Mar 2026
|
|
||||||
Ticker: META
|
|
||||||
Total Shares Released: 100.0
|
|
||||||
Market Price: $612.34
|
|
||||||
Shares Withheld for Taxes: 45
|
|
||||||
Tax Withholding Amount: $27,555.30
|
|
||||||
</p>
|
|
||||||
</body></html>"""
|
|
||||||
|
|
||||||
|
|
||||||
def test_vest_release_returns_two_activities_and_vest_event() -> None:
|
|
||||||
"""Release Confirmation yields a BUY (full vest) + SELL (sell-to-cover) + VestEvent."""
|
|
||||||
from broker_sync.providers.parsers.schwab import parse_schwab_email_full
|
|
||||||
|
|
||||||
result = parse_schwab_email_full(_VEST_RELEASE)
|
|
||||||
assert result.vest_event is not None
|
|
||||||
assert result.vest_event.ticker == "META"
|
|
||||||
assert result.vest_event.shares_vested == Decimal("100.0")
|
|
||||||
assert result.vest_event.shares_sold_to_cover == Decimal("45")
|
|
||||||
assert result.vest_event.fmv_at_vest_usd == Decimal("612.34")
|
|
||||||
assert result.vest_event.tax_withheld_usd == Decimal("27555.30")
|
|
||||||
assert result.vest_event.vest_date.date().isoformat() == "2026-03-15"
|
|
||||||
assert result.vest_event.external_id.startswith("schwab:2026-03-15:META:VEST:")
|
|
||||||
|
|
||||||
assert len(result.activities) == 2
|
|
||||||
buy = result.activities[0]
|
|
||||||
assert buy.activity_type is ActivityType.BUY
|
|
||||||
assert buy.quantity == Decimal("100.0")
|
|
||||||
sell = result.activities[1]
|
|
||||||
assert sell.activity_type is ActivityType.SELL
|
|
||||||
assert sell.quantity == Decimal("45")
|
|
||||||
assert sell.unit_price == Decimal("612.34")
|
|
||||||
|
|
||||||
|
|
||||||
def test_vest_email_with_unparseable_body_returns_empty() -> None:
|
|
||||||
"""Subject says Release Confirmation but fields missing → empty result, no crash."""
|
|
||||||
from broker_sync.providers.parsers.schwab import parse_schwab_email_full
|
|
||||||
|
|
||||||
html = "<html><body>Release Confirmation — please contact support</body></html>"
|
|
||||||
result = parse_schwab_email_full(html)
|
|
||||||
assert result.vest_event is None
|
|
||||||
assert result.activities == []
|
|
||||||
|
|
||||||
|
|
||||||
def test_back_compat_parse_schwab_email_drops_vest_event() -> None:
|
|
||||||
"""The legacy list[Activity] shape remains stable for existing callers."""
|
|
||||||
acts = parse_schwab_email(_VEST_RELEASE)
|
|
||||||
assert len(acts) == 2
|
|
||||||
assert all(isinstance(a.activity_type, ActivityType) for a in acts)
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue