schwab: detect vest-confirmation emails + emit VestEvent
Extends parse_schwab_email to handle Schwab's RSU Release Confirmation emails alongside the existing trade confirmations. Adds: - `VestEvent` dataclass in models.py — carries vest_date, ticker, shares_vested, shares_sold_to_cover, fmv_at_vest_usd, tax_withheld_usd. Written to payslip_ingest.rsu_vest_events by a postgres sink (pending a real email fixture + cross-service DB grant). - `parse_schwab_email_full()` — new entry point returning both `list[Activity]` and `VestEvent | None`. The legacy `parse_schwab_email()` shape is preserved for existing callers. - Vest-release dispatch heuristic: HTML body mentions "Release Confirmation" / "Award Vesting" / "RSU Release". On match, extract vest fields via label regexes; the full vest becomes a BUY Activity and the sell-to-cover slice becomes a SELL Activity at the same FMV (net zero cash on the day). Gross vest + sell-to-cover returned so Wealthfolio gets the full portfolio picture. - Tests: 3 new (vest roundtrip, unparseable-vest safety, legacy shape preserved); existing 6 unchanged. The regex heuristics will need tightening once a real email sample exists — the HTML structure observed in public Schwab emails may differ in material ways. For now, unmatched vest bodies return empty-result (no Activity, no VestEvent) rather than crashing the IMAP batch. Part of: code-860
This commit is contained in:
parent
6f3bcea23e
commit
1d1e20b72b
3 changed files with 261 additions and 16 deletions
|
|
@ -102,3 +102,27 @@ def _fmt(v: Decimal | None) -> str:
|
|||
if v is None:
|
||||
return ""
|
||||
return format(v, "f")
|
||||
|
||||
|
||||
@dataclass
|
||||
class VestEvent:
|
||||
"""Schwab RSU vest event — written to payslip_ingest.rsu_vest_events.
|
||||
|
||||
Carries both the gross vest (shares x FMV) and the sell-to-cover portion
|
||||
(shares withheld for tax x FMV). Sibling Activity records (one BUY for
|
||||
the full vest, one SELL for the sold-to-cover slice) are produced
|
||||
separately for Wealthfolio.
|
||||
|
||||
USD-only at parse time; FX conversion happens at the postgres sink via
|
||||
the ECB daily rate so the DB row carries both the raw USD figures and
|
||||
the GBP-translated values for dashboard joins.
|
||||
"""
|
||||
external_id: str # schwab:{date}:{ticker}:VEST:{shares_vested}
|
||||
vest_date: datetime
|
||||
ticker: str
|
||||
shares_vested: Decimal
|
||||
shares_sold_to_cover: Decimal | None
|
||||
fmv_at_vest_usd: Decimal
|
||||
tax_withheld_usd: Decimal | None
|
||||
source: str = "schwab_email"
|
||||
raw: dict[str, str] = field(default_factory=dict)
|
||||
|
|
|
|||
|
|
@ -1,37 +1,79 @@
|
|||
"""Schwab workplace-RSU email parser.
|
||||
|
||||
Schwab sends HTML transaction-confirmation emails with the core fields in
|
||||
five `<td class="dark-background-body" align="right">` elements:
|
||||
1. Trade date (human format — e.g. "Jan 23, 2025")
|
||||
2. Direction word ("Sold" for SELL; anything else is BUY)
|
||||
3. Quantity (share count, float)
|
||||
4. Ticker
|
||||
5. Price ("$123.45" — currency-sign-prefixed)
|
||||
Two email shapes are handled:
|
||||
|
||||
One email → one Activity. On any parse failure we return an empty list
|
||||
(same as the original finance/ behaviour — an unparseable email shouldn't
|
||||
crash the whole IMAP batch).
|
||||
1. Trade confirmations (sell-to-cover or user-initiated trades): HTML
|
||||
with five `<td class="dark-background-body" align="right">` cells
|
||||
holding date / direction / quantity / ticker / price. → one Activity.
|
||||
|
||||
Ported from finance/position/provider/schwab/message_parser.py (39 lines).
|
||||
Dropped: per-row timestamp id suffix (we use ISO date + ticker + qty which
|
||||
is stable across re-pulls), currency-from-sign hackery (US Schwab is USD-
|
||||
only in practice — if that ever changes we'll add FX on parse).
|
||||
2. Release Confirmations (RSU vest events): subject/body mentions
|
||||
"Release Confirmation" or "Award Vesting"; body lists vest date,
|
||||
shares released, FMV, shares sold to cover, and USD tax withheld.
|
||||
→ (Activity, Activity, VestEvent) tuple: the gross vest (BUY at FMV),
|
||||
the sell-to-cover (SELL at FMV), and a standalone VestEvent for the
|
||||
payslip-ingest reconciliation pipeline.
|
||||
|
||||
On any parse failure we return the neutral empty result (no Activities,
|
||||
no VestEvent) — an unparseable email shouldn't crash the IMAP batch.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from decimal import Decimal, InvalidOperation
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from dateutil import parser as dateparser
|
||||
|
||||
from broker_sync.models import AccountType, Activity, ActivityType
|
||||
from broker_sync.models import AccountType, Activity, ActivityType, VestEvent
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_ACCOUNT_ID = "schwab-workplace"
|
||||
_DEFAULT_CURRENCY = "USD"
|
||||
|
||||
# Vest-confirmation emails reliably include one of these phrases. Matching
|
||||
# is case-insensitive and on the raw HTML (cheap — no DOM parse needed).
|
||||
_VEST_SUBJECT_RE = re.compile(r"Release Confirmation|Award Vesting|RSU Release",
|
||||
re.IGNORECASE)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VestParseResult:
|
||||
activities: list[Activity]
|
||||
vest_event: VestEvent | None
|
||||
|
||||
|
||||
def parse_schwab_email(raw_html: str) -> list[Activity]:
|
||||
"""Return a single-item list of Activity on success, empty on failure."""
|
||||
"""Return a single-item list of Activity on success, empty on failure.
|
||||
|
||||
For vest-confirmation emails, returns the two Activity rows (gross
|
||||
vest + sell-to-cover). Use `parse_schwab_email_full` when the caller
|
||||
also needs the VestEvent.
|
||||
"""
|
||||
return parse_schwab_email_full(raw_html).activities
|
||||
|
||||
|
||||
def parse_schwab_email_full(raw_html: str) -> VestParseResult:
|
||||
"""Full parse — returns activities + optional VestEvent.
|
||||
|
||||
Dispatches: vest-confirmation emails → `_parse_vest_release`;
|
||||
everything else → the legacy single-row confirmation parser.
|
||||
"""
|
||||
if _VEST_SUBJECT_RE.search(raw_html):
|
||||
result = _parse_vest_release(raw_html)
|
||||
if result is not None:
|
||||
return result
|
||||
log.warning("schwab: detected vest email but could not extract fields; "
|
||||
"add a real fixture to broker-sync/tests/fixtures/")
|
||||
return VestParseResult(activities=[], vest_event=None)
|
||||
|
||||
return VestParseResult(activities=_parse_trade_confirmation(raw_html), vest_event=None)
|
||||
|
||||
|
||||
def _parse_trade_confirmation(raw_html: str) -> list[Activity]:
|
||||
"""Legacy 5-cell trade confirmation parser."""
|
||||
try:
|
||||
soup = BeautifulSoup(raw_html, "html.parser")
|
||||
cells = [
|
||||
|
|
@ -73,3 +115,126 @@ def parse_schwab_email(raw_html: str) -> list[Activity]:
|
|||
]
|
||||
except (ValueError, InvalidOperation, IndexError, AttributeError):
|
||||
return []
|
||||
|
||||
|
||||
# Heuristic extractors for vest-release emails. Labels observed in public
|
||||
# Schwab RSU release samples; real fixture needed to tighten these.
|
||||
_VEST_DATE_RE = re.compile(
|
||||
r"(?:Release Date|Vest Date|Vesting Date)\s*[:<][^0-9]*"
|
||||
r"(\d{1,2}[\s/\-][A-Za-z]{3}[\s/\-]\d{2,4}|\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})",
|
||||
re.IGNORECASE)
|
||||
_VEST_TICKER_RE = re.compile(r"(?:Ticker|Symbol)\s*[:<]\s*([A-Z]{2,5})",
|
||||
re.IGNORECASE)
|
||||
_VEST_SHARES_RELEASED_RE = re.compile(
|
||||
r"(?:Shares Released|Total Shares (?:Released|Vested))\s*[:<]\s*"
|
||||
r"([\d,]+(?:\.\d+)?)",
|
||||
re.IGNORECASE)
|
||||
_VEST_SHARES_WITHHELD_RE = re.compile(
|
||||
r"(?:Shares (?:Withheld|Sold)(?: for Taxes)?)\s*[:<]\s*"
|
||||
r"([\d,]+(?:\.\d+)?)",
|
||||
re.IGNORECASE)
|
||||
_VEST_FMV_RE = re.compile(
|
||||
r"(?:Market Price|FMV|Fair Market Value)\s*[:<]\s*"
|
||||
r"\$?\s*([\d,]+(?:\.\d+)?)",
|
||||
re.IGNORECASE)
|
||||
_VEST_TAX_USD_RE = re.compile(
|
||||
r"(?:Tax Withholding Amount|Total Tax Withholding|Tax Withheld)\s*[:<]\s*"
|
||||
r"\$?\s*([\d,]+(?:\.\d+)?)",
|
||||
re.IGNORECASE)
|
||||
|
||||
|
||||
def _parse_vest_release(raw_html: str) -> VestParseResult | None:
|
||||
"""Best-effort extraction from a Schwab Release Confirmation email.
|
||||
|
||||
Runs label regexes on the plain-text view of the HTML. Returns None
|
||||
(signalling fall-through) if the core four fields (date, ticker,
|
||||
shares released, FMV) don't all resolve — that's a strong signal the
|
||||
heuristics need a real fixture before they can be trusted on a live
|
||||
email.
|
||||
"""
|
||||
try:
|
||||
soup = BeautifulSoup(raw_html, "html.parser")
|
||||
text = soup.get_text(" ", strip=True)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
date_str = _search_group(_VEST_DATE_RE, text)
|
||||
ticker = _search_group(_VEST_TICKER_RE, text)
|
||||
shares_released_str = _search_group(_VEST_SHARES_RELEASED_RE, text)
|
||||
fmv_str = _search_group(_VEST_FMV_RE, text)
|
||||
if not (date_str and ticker and shares_released_str and fmv_str):
|
||||
return None
|
||||
|
||||
try:
|
||||
vest_date = dateparser.parse(date_str)
|
||||
shares_vested = Decimal(shares_released_str.replace(",", ""))
|
||||
fmv = Decimal(fmv_str.replace(",", ""))
|
||||
except (ValueError, InvalidOperation):
|
||||
return None
|
||||
|
||||
shares_sold_str = _search_group(_VEST_SHARES_WITHHELD_RE, text)
|
||||
shares_sold_to_cover = (Decimal(shares_sold_str.replace(",", ""))
|
||||
if shares_sold_str else None)
|
||||
tax_usd_str = _search_group(_VEST_TAX_USD_RE, text)
|
||||
tax_withheld_usd = (Decimal(tax_usd_str.replace(",", ""))
|
||||
if tax_usd_str else None)
|
||||
|
||||
external_id = (f"schwab:{vest_date.date().isoformat()}:{ticker}:VEST:"
|
||||
f"{shares_vested}")
|
||||
|
||||
vest_event = VestEvent(
|
||||
external_id=external_id,
|
||||
vest_date=vest_date,
|
||||
ticker=ticker,
|
||||
shares_vested=shares_vested,
|
||||
shares_sold_to_cover=shares_sold_to_cover,
|
||||
fmv_at_vest_usd=fmv,
|
||||
tax_withheld_usd=tax_withheld_usd,
|
||||
source="schwab_email",
|
||||
raw={
|
||||
"date": date_str,
|
||||
"ticker": ticker,
|
||||
"shares_released": shares_released_str,
|
||||
"fmv": fmv_str,
|
||||
"shares_withheld": shares_sold_str or "",
|
||||
"tax_withheld": tax_usd_str or "",
|
||||
},
|
||||
)
|
||||
|
||||
# Sibling Activities for Wealthfolio: full vest as BUY, sell-to-cover
|
||||
# slice as SELL, both at the same FMV so net cash = 0 on that day.
|
||||
activities: list[Activity] = [
|
||||
Activity(
|
||||
external_id=f"{external_id}:BUY",
|
||||
account_id=_ACCOUNT_ID,
|
||||
account_type=AccountType.GIA,
|
||||
date=vest_date,
|
||||
activity_type=ActivityType.BUY,
|
||||
symbol=ticker,
|
||||
quantity=shares_vested,
|
||||
unit_price=fmv,
|
||||
currency=_DEFAULT_CURRENCY,
|
||||
notes="schwab-vest-release",
|
||||
)
|
||||
]
|
||||
if shares_sold_to_cover is not None and shares_sold_to_cover > 0:
|
||||
activities.append(
|
||||
Activity(
|
||||
external_id=f"{external_id}:SELL_TO_COVER",
|
||||
account_id=_ACCOUNT_ID,
|
||||
account_type=AccountType.GIA,
|
||||
date=vest_date,
|
||||
activity_type=ActivityType.SELL,
|
||||
symbol=ticker,
|
||||
quantity=shares_sold_to_cover,
|
||||
unit_price=fmv,
|
||||
currency=_DEFAULT_CURRENCY,
|
||||
notes="schwab-sell-to-cover",
|
||||
))
|
||||
|
||||
return VestParseResult(activities=activities, vest_event=vest_event)
|
||||
|
||||
|
||||
def _search_group(pattern: re.Pattern[str], text: str) -> str | None:
|
||||
m = pattern.search(text)
|
||||
return m.group(1).strip() if m else None
|
||||
|
|
|
|||
|
|
@ -82,3 +82,59 @@ def test_price_with_commas_parses() -> None:
|
|||
html = _SELL.replace("$612.34", "$1,612.34")
|
||||
a = parse_schwab_email(html)[0]
|
||||
assert a.unit_price == Decimal("1612.34")
|
||||
|
||||
|
||||
# --- Vest-release parsing -------------------------------------------------
|
||||
|
||||
_VEST_RELEASE = """<html><body>
|
||||
<h2>Release Confirmation</h2>
|
||||
<p>
|
||||
Release Date: 15 Mar 2026
|
||||
Ticker: META
|
||||
Total Shares Released: 100.0
|
||||
Market Price: $612.34
|
||||
Shares Withheld for Taxes: 45
|
||||
Tax Withholding Amount: $27,555.30
|
||||
</p>
|
||||
</body></html>"""
|
||||
|
||||
|
||||
def test_vest_release_returns_two_activities_and_vest_event() -> None:
|
||||
"""Release Confirmation yields a BUY (full vest) + SELL (sell-to-cover) + VestEvent."""
|
||||
from broker_sync.providers.parsers.schwab import parse_schwab_email_full
|
||||
|
||||
result = parse_schwab_email_full(_VEST_RELEASE)
|
||||
assert result.vest_event is not None
|
||||
assert result.vest_event.ticker == "META"
|
||||
assert result.vest_event.shares_vested == Decimal("100.0")
|
||||
assert result.vest_event.shares_sold_to_cover == Decimal("45")
|
||||
assert result.vest_event.fmv_at_vest_usd == Decimal("612.34")
|
||||
assert result.vest_event.tax_withheld_usd == Decimal("27555.30")
|
||||
assert result.vest_event.vest_date.date().isoformat() == "2026-03-15"
|
||||
assert result.vest_event.external_id.startswith("schwab:2026-03-15:META:VEST:")
|
||||
|
||||
assert len(result.activities) == 2
|
||||
buy = result.activities[0]
|
||||
assert buy.activity_type is ActivityType.BUY
|
||||
assert buy.quantity == Decimal("100.0")
|
||||
sell = result.activities[1]
|
||||
assert sell.activity_type is ActivityType.SELL
|
||||
assert sell.quantity == Decimal("45")
|
||||
assert sell.unit_price == Decimal("612.34")
|
||||
|
||||
|
||||
def test_vest_email_with_unparseable_body_returns_empty() -> None:
|
||||
"""Subject says Release Confirmation but fields missing → empty result, no crash."""
|
||||
from broker_sync.providers.parsers.schwab import parse_schwab_email_full
|
||||
|
||||
html = "<html><body>Release Confirmation — please contact support</body></html>"
|
||||
result = parse_schwab_email_full(html)
|
||||
assert result.vest_event is None
|
||||
assert result.activities == []
|
||||
|
||||
|
||||
def test_back_compat_parse_schwab_email_drops_vest_event() -> None:
|
||||
"""The legacy list[Activity] shape remains stable for existing callers."""
|
||||
acts = parse_schwab_email(_VEST_RELEASE)
|
||||
assert len(acts) == 2
|
||||
assert all(isinstance(a.activity_type, ActivityType) for a in acts)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue