parsers/schwab: emit paired BUY for recent SELL (vest synthesis)
Some checks are pending
CI / test (push) Waiting to run
CI / build (push) Blocked by required conditions
CI / deploy (push) Blocked by required conditions
ci/woodpecker/push/build Pipeline was successful

Schwab Stock Plan Services doesn't email vest-release confirmations to
the employee inbox — only the same-day-sell trade-executed alert lands.
The vest itself was invisible to broker-sync, so the META cadence
panel in the wealth dashboard has been missing the May 2026 vest BUY
and would keep missing every future vest.

Synthesis: when a SELL email's trade date is on/after the configured
boundary (default 2026-04-01), also emit a paired BUY with identical
date/qty/price/symbol. Notes link the pair via the SELL's external_id.
Verified true across 14 historical vests — 100% same-day-sell pattern,
SELL qty == vest qty.

Boundary stops the synthesis from back-filling vests prior to 2026-04
which already have csv-sourced BUY rows in Wealthfolio from the
historical one-shot backfill (last vest 2026-02-18). The csv BUYs and
inferred BUYs have distinct external_ids, so re-running against old
emails would double-count without this guard. Override via env var
`SCHWAB_VEST_INFER_FROM_DATE=yyyy-mm-dd` on the broker-sync-imap cron.

Tests: 4 new cases — recent SELL pairs, old SELL doesn't pair, env
override works, BUY-direction emails (rare) don't get paired.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-05-27 10:02:07 +00:00
parent abf9fa7cb5
commit 17c2a69c6c
2 changed files with 134 additions and 21 deletions

View file

@ -7,9 +7,16 @@ cells holding date / direction / quantity / ticker / price.
It does NOT email vest-release / Release Confirmation messages to the It does NOT email vest-release / Release Confirmation messages to the
employee address for this account (verified against 4 years of inbox employee address for this account (verified against 4 years of inbox
history, 2022-2026 see infra/docs in code-fqgr). Vest data must come history, 2022-2026). The vest itself is invisible to IMAP.
from the META payslip via payslip-ingest, not from email. The whole
vest-release parser that used to live here was dead code. Same-day-sell synthesis: Meta RSUs vest and are sold the same day at
the same FMV (verified across 14 historical vests). When a SELL email
is parsed AND its trade date is on or after `VEST_INFER_FROM_DATE`,
we ALSO emit a paired BUY representing the underlying vest event
same date, same quantity, same price. The date boundary stops this
back-filling historical vests that already have csv-sourced BUY rows
in Wealthfolio (which would duplicate at chart-level despite distinct
external_ids).
On any parse failure we return an empty list an unparseable email On any parse failure we return an empty list an unparseable email
shouldn't crash the IMAP batch. shouldn't crash the IMAP batch.
@ -17,6 +24,8 @@ shouldn't crash the IMAP batch.
from __future__ import annotations from __future__ import annotations
import logging import logging
import os
from datetime import date, datetime
from decimal import Decimal, InvalidOperation from decimal import Decimal, InvalidOperation
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -29,9 +38,34 @@ log = logging.getLogger(__name__)
_ACCOUNT_ID = "schwab-workplace" _ACCOUNT_ID = "schwab-workplace"
_DEFAULT_CURRENCY = "USD" _DEFAULT_CURRENCY = "USD"
# Inferred-BUY synthesis boundary. SELL emails on or after this date
# emit a paired BUY for the underlying vest; earlier ones do not (they
# already have csv-sourced BUYs in Wealthfolio from the one-shot
# historical backfill, last vest 2026-02-18). Override at runtime with
# the env var if a different cutover is needed. ISO-8601 yyyy-mm-dd.
_DEFAULT_VEST_INFER_FROM = "2026-04-01"
def _vest_infer_from() -> date:
raw = os.environ.get("SCHWAB_VEST_INFER_FROM_DATE", _DEFAULT_VEST_INFER_FROM).strip()
try:
return datetime.strptime(raw, "%Y-%m-%d").date()
except ValueError:
log.warning(
"SCHWAB_VEST_INFER_FROM_DATE=%r is not yyyy-mm-dd; using default %s",
raw, _DEFAULT_VEST_INFER_FROM,
)
return datetime.strptime(_DEFAULT_VEST_INFER_FROM, "%Y-%m-%d").date()
def parse_schwab_email(raw_html: str) -> list[Activity]: def parse_schwab_email(raw_html: str) -> list[Activity]:
"""Return a one-element list of Activity on success, empty on failure.""" """Return Activities for a Schwab trade-executed email.
Returns: empty list on parse failure; one Activity for a BUY-direction
email (rare the workplace account is essentially sell-only); for a
SELL email, returns [SELL] plus an inferred paired BUY (=vest event)
when the trade date is on or after the synthesis-boundary date.
"""
try: try:
soup = BeautifulSoup(raw_html, "html.parser") soup = BeautifulSoup(raw_html, "html.parser")
cells = [ cells = [
@ -52,22 +86,40 @@ def parse_schwab_email(raw_html: str) -> list[Activity]:
for sign in ("$", "£", "", "USD", "GBP", "EUR"): for sign in ("$", "£", "", "USD", "GBP", "EUR"):
price_clean = price_clean.replace(sign, "") price_clean = price_clean.replace(sign, "")
unit_price = Decimal(price_clean.replace(",", "").strip()) unit_price = Decimal(price_clean.replace(",", "").strip())
ticker_clean = ticker.strip()
external_id = (f"schwab:{trade_date.date().isoformat()}:{ticker}:" external_id = (f"schwab:{trade_date.date().isoformat()}:{ticker_clean}:"
f"{direction.value}:{quantity}") f"{direction.value}:{quantity}")
return [ primary = Activity(
Activity( external_id=external_id,
external_id=external_id, account_id=_ACCOUNT_ID,
account_id=_ACCOUNT_ID, account_type=AccountType.GIA,
account_type=AccountType.GIA, date=trade_date,
date=trade_date, activity_type=direction,
activity_type=direction, symbol=ticker_clean,
symbol=ticker.strip(), quantity=quantity,
quantity=quantity, unit_price=unit_price,
unit_price=unit_price, currency=_DEFAULT_CURRENCY,
currency=_DEFAULT_CURRENCY, notes=f"schwab-email:{direction_txt}",
notes=f"schwab-email:{direction_txt}", )
)
] if direction is not ActivityType.SELL or trade_date.date() < _vest_infer_from():
return [primary]
inferred_buy = Activity(
external_id=(f"schwab:vest:{trade_date.date().isoformat()}:"
f"{ticker_clean}:BUY:{quantity}"),
account_id=_ACCOUNT_ID,
account_type=AccountType.GIA,
date=trade_date,
activity_type=ActivityType.BUY,
symbol=ticker_clean,
quantity=quantity,
unit_price=unit_price,
currency=_DEFAULT_CURRENCY,
notes=(f"schwab-vest-inferred-from-same-day-sell | "
f"paired_sell_external_id={external_id}"),
)
return [inferred_buy, primary]
except (ValueError, InvalidOperation, IndexError, AttributeError): except (ValueError, InvalidOperation, IndexError, AttributeError):
return [] return []

View file

@ -80,5 +80,66 @@ def test_external_id_is_stable_across_reruns() -> None:
def test_price_with_commas_parses() -> None: def test_price_with_commas_parses() -> None:
html = _SELL.replace("$612.34", "$1,612.34") html = _SELL.replace("$612.34", "$1,612.34")
a = parse_schwab_email(html)[0] # The first activity is the inferred BUY (date 2025-01-23 ≥ 2026-04-01? no →
assert a.unit_price == Decimal("1612.34") # only one activity for this old-dated email), so index 0 is the SELL.
acts = parse_schwab_email(html)
sell = next(a for a in acts if a.activity_type is ActivityType.SELL)
assert sell.unit_price == Decimal("1612.34")
# --- Inferred vest BUY ---------------------------------------------------
def _recent_sell(date_iso: str = "2026-05-19", qty: str = "55", price: str = "609.35") -> str:
return f"""
<html><body><table>
<tr><td class="dark-background-body" align="right">{date_iso}</td></tr>
<tr><td class="dark-background-body" align="right">Sold</td></tr>
<tr><td class="dark-background-body" align="right">{qty}</td></tr>
<tr><td class="dark-background-body" align="right">META</td></tr>
<tr><td class="dark-background-body" align="right">${price}</td></tr>
</table></body></html>
"""
def test_recent_sell_emits_paired_buy() -> None:
"""SELL dated on/after the synthesis boundary triggers a paired BUY."""
acts = parse_schwab_email(_recent_sell())
assert len(acts) == 2
buy = next(a for a in acts if a.activity_type is ActivityType.BUY)
sell = next(a for a in acts if a.activity_type is ActivityType.SELL)
assert buy.quantity == sell.quantity == Decimal("55")
assert buy.unit_price == sell.unit_price == Decimal("609.35")
assert buy.date == sell.date
assert buy.symbol == sell.symbol == "META"
assert "schwab-vest-inferred-from-same-day-sell" in (buy.notes or "")
assert buy.external_id == "schwab:vest:2026-05-19:META:BUY:55"
assert sell.external_id == "schwab:2026-05-19:META:SELL:55"
def test_old_sell_emits_only_sell() -> None:
"""SELL dated before 2026-04-01 (default boundary) skips the paired BUY —
those vests already have csv-sourced BUY rows in Wealthfolio."""
acts = parse_schwab_email(_recent_sell(date_iso="2025-08-19"))
assert len(acts) == 1
assert acts[0].activity_type is ActivityType.SELL
def test_boundary_env_var_overrides(monkeypatch: object) -> None:
"""The synthesis boundary is configurable via env var."""
import os
os.environ["SCHWAB_VEST_INFER_FROM_DATE"] = "2025-01-01"
try:
acts = parse_schwab_email(_recent_sell(date_iso="2025-08-19"))
assert len(acts) == 2 # now in scope
finally:
del os.environ["SCHWAB_VEST_INFER_FROM_DATE"]
def test_buy_email_does_not_emit_inferred_buy() -> None:
"""BUY-direction emails (rare for workplace account) don't get paired."""
acts = parse_schwab_email(_BUY.replace("2024-11-15", "2026-05-15"))
assert len(acts) == 1
assert acts[0].activity_type is ActivityType.BUY