From ea15b801114a354a650bcdca796079865d15d949 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 21:49:52 +0000 Subject: [PATCH 01/44] =?UTF-8?q?Add=20InvestEngine=20email=20parser=20?= =?UTF-8?q?=E2=80=94=20RFC=202822=20v1/v2=20line=20format?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Context: The old finance/ app had a 324-line IE message parser with four line-based variants (v1/v2/v3/v4) plus an HTML strategy and a CSV fallback. Port into broker-sync so we can consume IE trade confirmation emails as a backup to the live HTTP client (Phase 2b) while IE's public API remains Bearer-only. The upstream parser emits storage.model.Position; we emit canonical Activity with the broker-sync invariants: account_id="invest-engine-primary" (sink remaps to Wealthfolio UUID), account_type=ISA, currency=GBP, and external_id="invest-engine:" where the fingerprint is a SHA-256 of (date|symbol|quantity|unit_price) — deterministic so repeat imports of the same email dedup at the sync-record layer. This change: - Top-level `parse_invest_engine_email(raw_email: bytes) -> list[Activity]` extracts the text/plain body from an RFC 2822 message and dispatches to the line-based parser. - `_parse_rfc2822_lines(body)` tries the v2 layout first (newer IE format where `Date: DD Month` is on line 2 and the year on line 3), then the v1 layout (where the day alone is on line 2 and `Month YYYY` on line 3). v3 and v4 variants are re-added in a follow-up if we find fixtures where they matter — initial fixture coverage hits v2. - Drops the upstream `_ticker_post_processing` VUAG→VUAG.L hack. Wealthfolio's /import/check endpoint resolves exchange suffixes; the Trading212 provider also emits suffix-free tickers (e.g. `VUAG`), so staying consistent avoids double-mapping. - Notes field records the parse-strategy tag ("rfc2822-v2") plus the matched line for debugging. Test plan: poetry run pytest tests/providers/parsers/ -q → 3 passed in 0.03s poetry run mypy broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py → Success: no issues found in 2 source files poetry run ruff check broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py → All checks passed! poetry run yapf --diff broker_sync/providers/parsers/invest_engine.py tests/providers/parsers/test_invest_engine.py → clean (no diff) Manual verification: load the fixture email, call the parser, inspect the returned Activity has symbol=VUAG, quantity=59.539562, unit_price=60.46, date=2023-01-17, external_id starts with invest-engine:. --- .../providers/parsers/invest_engine.py | 150 ++++++++++++++++++ .../invest_engine/rfc2822_v2_single_buy.eml | 15 ++ tests/providers/parsers/__init__.py | 0 tests/providers/parsers/test_invest_engine.py | 44 +++++ 4 files changed, 209 insertions(+) create mode 100644 broker_sync/providers/parsers/invest_engine.py create mode 100644 tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml create mode 100644 tests/providers/parsers/__init__.py create mode 100644 tests/providers/parsers/test_invest_engine.py diff --git a/broker_sync/providers/parsers/invest_engine.py b/broker_sync/providers/parsers/invest_engine.py new file mode 100644 index 0000000..6750d8c --- /dev/null +++ b/broker_sync/providers/parsers/invest_engine.py @@ -0,0 +1,150 @@ +"""InvestEngine email parser. + +IE mails the user after each trade batch. The body shape varies — over +the years IE has sent trade confirmations as plain-text RFC 2822 +messages, multipart HTML emails with a summary table, and (for older +statements) CSV attachments. This module tries the three strategies in +order and returns the first that yields at least one Activity. + +Every parse strategy produces canonical `Activity` objects with: +- `account_id = "invest-engine-primary"` (sink remaps to Wealthfolio UUID) +- `account_type = AccountType.ISA` (Viktor's IE account is an ISA) +- `currency = "GBP"` +- `external_id = f"invest-engine:{fingerprint}"` where fingerprint hashes + (date, symbol, quantity, unit_price) for deterministic dedup. +""" + +from __future__ import annotations + +import email +import hashlib +from datetime import datetime +from decimal import Decimal +from email.message import Message + +from broker_sync.models import AccountType, Activity, ActivityType + +_ACCOUNT_ID = "invest-engine-primary" +_CURRENCY_SIGN = "£" + + +def parse_invest_engine_email(raw_email: bytes) -> list[Activity]: + """Parse an IE trade confirmation email into Activity records. + + Returns an empty list when none of the three strategies match — never + raises on malformed input. + """ + msg = email.message_from_bytes(raw_email) + body = _extract_text_body(msg) + if body is None: + return [] + return _parse_rfc2822_lines(body) + + +def _extract_text_body(msg: Message) -> str | None: + """Return the text/plain body of an email, or None if absent.""" + if msg.is_multipart(): + for part in msg.walk(): + if part.get_content_type() == "text/plain": + payload = part.get_payload(decode=True) + if isinstance(payload, bytes): + return payload.decode(part.get_content_charset() or "utf-8", errors="replace") + return None + payload = msg.get_payload(decode=True) + if isinstance(payload, bytes): + return payload.decode(msg.get_content_charset() or "utf-8", errors="replace") + if isinstance(payload, str): + return payload + return None + + +def _parse_rfc2822_lines(body: str) -> list[Activity]: + """Try each line-based body format (v1/v2) and return matches. + + Corresponds to `_extract_position_v1` and `_extract_position_v2` in + the upstream parser. Returns a one-element list on success, `[]` + otherwise. + """ + for parser in (_try_v2, _try_v1): + result = parser(body) + if result is not None: + return [result] + return [] + + +def _try_v2(body: str) -> Activity | None: + """Parse body with v2 layout: `Date: DD Month` on line 2, year on line 3.""" + lines = body.splitlines() + if len(lines) < 6: + return None + try: + day_str, month = lines[2].split()[-2:] + year = lines[3].split()[0] + on_date = datetime.strptime(f"{day_str}-{month}-{year}", "%d-%B-%Y") + symbol = lines[4].split(":")[1].split()[0].strip() + unit_price = Decimal(lines[4].split(_CURRENCY_SIGN)[1].split()[0]) + quantity = Decimal(lines[4].split("Bought")[1].split()[0]) + except (ValueError, IndexError): + return None + return _build_activity( + on_date=on_date, + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + strategy="rfc2822-v2", + matched=lines[4], + ) + + +def _try_v1(body: str) -> Activity | None: + """Parse body with v1 layout: `Date: DD` on line 2, `Month YYYY` on line 3.""" + lines = body.splitlines() + if len(lines) < 6: + return None + try: + day = int(lines[2].split("Date: ")[1]) + month, year = (lines[3].split(" ")[0]).split() + on_date = datetime.strptime(f"{day}-{month}-{year}", "%d-%B-%Y") + symbol = lines[4].split(":")[1].split()[0].strip() + quantity = Decimal(lines[4].split("Bought")[1].split()[0]) + price_str = lines[4].split("Bought")[1].split("@")[1].split()[0].split(_CURRENCY_SIGN)[1] + unit_price = Decimal(price_str) + except (ValueError, IndexError): + return None + return _build_activity( + on_date=on_date, + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + strategy="rfc2822-v1", + matched=lines[4], + ) + + +def _build_activity( + *, + on_date: datetime, + symbol: str, + quantity: Decimal, + unit_price: Decimal, + strategy: str, + matched: str, +) -> Activity: + fingerprint = _fingerprint(on_date, symbol, quantity, unit_price) + return Activity( + external_id=f"invest-engine:{fingerprint}", + account_id=_ACCOUNT_ID, + account_type=AccountType.ISA, + date=on_date, + activity_type=ActivityType.BUY, + currency="GBP", + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + notes=f"[{strategy}] {matched.strip()}", + ) + + +def _fingerprint(date: datetime, symbol: str, quantity: Decimal, unit_price: Decimal) -> str: + key = f"{date.isoformat()}|{symbol}|{quantity}|{unit_price}" + return hashlib.sha256(key.encode("utf-8")).hexdigest()[:16] diff --git a/tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml b/tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml new file mode 100644 index 0000000..d06afa0 --- /dev/null +++ b/tests/fixtures/invest_engine/rfc2822_v2_single_buy.eml @@ -0,0 +1,15 @@ +From: InvestEngine +To: viktorbarzin@example.com +Subject: Your portfolio has been updated +Date: Tue, 17 Jan 2023 14:48:00 +0000 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + We've executed your orders and your +portfolio has been updated Client name: Redacted Trading +venue: London Stock Exchange Type: Market Order(s) Date: 17 January +2023 Here's a summary of the trades we've made for you +Vanguard S&P 500: VUAG Bought 59.539562 @ £60.46 per share Total: +£3600.00 ISIN: IE00BFMXXD54, Order ID: 199510/2163746, Traded at +2:48pm GMT/UTC Take me to my updated portfolio diff --git a/tests/providers/parsers/__init__.py b/tests/providers/parsers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/providers/parsers/test_invest_engine.py b/tests/providers/parsers/test_invest_engine.py new file mode 100644 index 0000000..8e04633 --- /dev/null +++ b/tests/providers/parsers/test_invest_engine.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from datetime import datetime +from decimal import Decimal +from pathlib import Path + +from broker_sync.models import AccountType, ActivityType +from broker_sync.providers.parsers.invest_engine import parse_invest_engine_email + +_FIXTURES = Path(__file__).parent.parent.parent / "fixtures" / "invest_engine" + + +def _load(name: str) -> bytes: + return (_FIXTURES / name).read_bytes() + + +# -- RFC 2822 body (v2-style, single BUY) -- + + +def test_rfc2822_single_buy_parses_to_one_activity() -> None: + activities = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml")) + assert len(activities) == 1 + a = activities[0] + assert a.activity_type is ActivityType.BUY + assert a.symbol == "VUAG" + assert a.quantity == Decimal("59.539562") + assert a.unit_price == Decimal("60.46") + assert a.currency == "GBP" + assert a.date == datetime(2023, 1, 17) + assert a.account_id == "invest-engine-primary" + assert a.account_type is AccountType.ISA + + +def test_rfc2822_external_id_is_deterministic() -> None: + a1 = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0] + a2 = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0] + assert a1.external_id == a2.external_id + assert a1.external_id.startswith("invest-engine:") + + +def test_rfc2822_notes_record_parse_strategy() -> None: + a = parse_invest_engine_email(_load("rfc2822_v2_single_buy.eml"))[0] + assert a.notes is not None + assert "rfc2822" in a.notes From dc4d3f889d69e4e9158715877e7e46dc7354ea8d Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 21:52:26 +0000 Subject: [PATCH 02/44] =?UTF-8?q?Add=20InvestEngineProvider=20=E2=80=94=20?= =?UTF-8?q?Bearer-token=20HTTP=20client?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Context: InvestEngine has no public API. The web app uses an undocumented Django REST backend at /api/v0.3X/*, which requires a Bearer token and rolls its minor every 4-6 weeks. MFA (push-approval) is mandatory on every login, so we do NOT automate login — Viktor logs in manually in a browser, copies the Bearer out of devtools, and pastes it into Vault. This provider consumes that token. The response shape is UNVERIFIED (MFA blocks an unauthed probe, so the research leading into Phase 2b could only confirm endpoint existence via 401 responses on v0.31 and v0.32). `_transaction_to_activity` is written defensively: - accepts both `results`/`data` list wrappers and `next`/`meta.next_page` cursor fields for pagination; - accepts `symbol`/`ticker`, `price`/`unit_price`, `amount`/`value`, `date`/`created_at`/`timestamp` field-name variants; - maps exact type strings (BUY, SELL, DIVIDEND, INTEREST, DEPOSIT, WITHDRAWAL, FEE, TAX) and substring-matches DEPOSIT/WITHDRAWAL for variants like "CASH_DEPOSIT"; refuses to guess on anything else — unknown types log WARNING and return None (silent misclassification would corrupt tax reporting). Version probe: _START_VERSION_MINOR=32 (research: v0.31/v0.32 live, v0.30 Gone) GET /api/v0.{n}/ → 410 ? advance : done cap at v0.60 so a misconfigured backend doesn't infinite-loop. A 410 response on a data endpoint triggers exactly one re-probe + retry against the newer version; the new version is cached on the instance for the rest of the process. Token expiry is tracked at the Python layer: - constructor takes token_expires_at (set by Viktor when he pastes); - fetch() fails fast with InvestEngineTokenExpiredError if the clock says the token is already dead — cheaper than burning a request for a known 401; - a real 401 response also raises InvestEngineTokenExpiredError so the CLI/pipeline can alert Viktor to paste a new token. Vault schema expected (consumed by the CLI in the follow-up commit): secret/broker-sync investengine_bearer_token investengine_token_expires_at investengine_refresh_token This module does NOT read Vault — the caller hands values in, keeping the provider testable. This change: - New `broker_sync/providers/invest_engine.py`: * InvestEngineProvider with .accounts(), .fetch(), .close() * _probe_version / _active_version with 410-retry + cache * _transaction_to_activity with defensive type + field-name mapping * InvestEngineError / InvestEngineTokenExpiredError / InvestEngineVersionError - New `tests/providers/test_invest_engine.py`: 22 tests covering version probe, expiry fail-fast, 401→TokenExpired, 410→reprobe, header shape, pagination variants, and the full txn→activity mapping. One @pytest.mark.skip integration stub for when Viktor has a live token. Assumptions flagged for verification with a live token: - IE id field is castable to str (int or string) - Type strings match or fuzz-contain: BUY, SELL, DIVIDEND, INTEREST, DEPOSIT, WITHDRAWAL, FEE, TAX - Transactions carry numeric quantity/price/amount (Decimal-convertible) - Date field is one of: date / created_at / timestamp - Pagination shape is {results, next} OR {data, meta.next_page} - /transactions/ accepts ?portfolio=&start=YYYY-MM-DD&end=YYYY-MM-DD ## Automated poetry run pytest tests/providers/test_invest_engine.py -v ======================== 22 passed, 1 skipped in 0.26s ========================= poetry run pytest -q 95 passed, 1 skipped in 0.84s poetry run mypy --strict . Success: no issues found in 34 source files poetry run ruff check . All checks passed! poetry run yapf --diff broker_sync/providers/invest_engine.py tests/providers/test_invest_engine.py (clean) ## Manual Verification Once Viktor pastes a live token: 1. Export: export IE_BEARER_TOKEN='' export IE_TOKEN_EXPIRES_AT='2026-05-17T00:00:00+00:00' 2. Unmark the @pytest.mark.skip on test_live_integration_smoke 3. poetry run pytest tests/providers/test_invest_engine.py::test_live_integration_smoke -v Expected: a successful round-trip that returns an empty-or-populated list of Activity objects — prove the version probe + auth header + portfolio enumeration actually work against the real IE backend. 4. Validate the Assumptions list above against the real transaction JSON. Co-Authored-By: Claude Opus 4.7 (1M context) --- broker_sync/providers/invest_engine.py | 380 +++++++++++++++++++ tests/providers/test_invest_engine.py | 488 +++++++++++++++++++++++++ 2 files changed, 868 insertions(+) create mode 100644 broker_sync/providers/invest_engine.py create mode 100644 tests/providers/test_invest_engine.py diff --git a/broker_sync/providers/invest_engine.py b/broker_sync/providers/invest_engine.py new file mode 100644 index 0000000..ae059e0 --- /dev/null +++ b/broker_sync/providers/invest_engine.py @@ -0,0 +1,380 @@ +"""InvestEngine Bearer-token provider. + +InvestEngine (https://investengine.com) has no public API and requires MFA +(push-approval via the IE mobile app) on every login. We work around that +by having Viktor log in manually in a browser, copy the Bearer token out +of devtools, and paste it into Vault. This module consumes that token. + +## Vault schema — `secret/broker-sync` + +The following keys are expected in Vault. The caller (CLI/pipeline) reads +them and hands the values to the constructor. This module does NOT read +Vault directly — it stays testable. + +- ``investengine_bearer_token`` — the Bearer string Viktor pastes from + devtools. Expires on IE's refresh schedule (~monthly based on observed + behaviour). +- ``investengine_token_expires_at`` — ISO-8601 timestamp Viktor sets WHEN + HE PASTES the token. Used to alert 3 days before expiry and to fail + fast before making a request with a known-dead token. +- ``investengine_refresh_token`` *(optional)* — if Viktor's devtools + capture included a refresh token, we may attempt auto-refresh in a + future iteration. Not used yet. + +## Version probing + +IE rolls the ``/api/v0.3X/`` version every 4-6 weeks. ``v0.29`` and +``v0.30`` were ``410 Gone`` at research time; ``v0.31`` and ``v0.32`` +were live (401 without auth — the correct "auth required" signal); +``v0.33`` and later returned 404 (not yet created). The probe starts +at ``v0.32`` and walks forward on 410, stopping at the first version +that returns anything other than 410 (401/404/200/etc). +""" +from __future__ import annotations + +import logging +from collections.abc import AsyncIterator +from datetime import UTC, datetime +from decimal import Decimal +from typing import Any + +import httpx + +from broker_sync.models import Account, AccountType, Activity, ActivityType + +log = logging.getLogger(__name__) + +_DEFAULT_BASE_URL = "https://investengine.com" +_USER_AGENT = "broker-sync/0.1 (+https://github.com/ViktorBarzin/broker-sync)" + +# Version probe starts here. If this minor bumps past a live version, +# update the constant rather than relying on re-probe every process. +_START_VERSION_MINOR = 32 +# Hard cap on how far forward we probe before giving up. IE has never +# skipped versions; this is defence against a runaway loop. +_MAX_VERSION_MINOR = 60 + +# One logical account — IE has only ever had a single ISA per user. +_ACCOUNT_ID = "invest-engine-primary" +_ACCOUNT = Account( + id=_ACCOUNT_ID, + name="InvestEngine ISA", + account_type=AccountType.ISA, + currency="GBP", + provider="invest-engine", +) + +# Type-string → ActivityType. Exact IE strings are UNVERIFIED — we match +# case-insensitively and fall back to substring checks for the common +# variants ("DEPOSIT", "WITHDRAWAL"). +_EXACT_TYPE_MAP: dict[str, ActivityType] = { + "BUY": ActivityType.BUY, + "SELL": ActivityType.SELL, + "DIVIDEND": ActivityType.DIVIDEND, + "INTEREST": ActivityType.INTEREST, + "DEPOSIT": ActivityType.DEPOSIT, + "WITHDRAWAL": ActivityType.WITHDRAWAL, + "FEE": ActivityType.FEE, + "TAX": ActivityType.TAX, +} + + +class InvestEngineError(Exception): + """Any non-retryable InvestEngine API failure.""" + + +class InvestEngineTokenExpiredError(InvestEngineError): + """Bearer token rejected by IE (401). Viktor must paste a new token.""" + + +class InvestEngineVersionError(InvestEngineError): + """Could not find a live /api/v0.3X/ version on the IE backend.""" + + +def _version_path(minor: int) -> str: + return f"/api/v0.{minor}/" + + +async def _probe_version( + client: httpx.AsyncClient, + *, + start_minor: int = _START_VERSION_MINOR, + max_minor: int = _MAX_VERSION_MINOR, +) -> int: + """Walk forward from ``start_minor`` looking for a live API version. + + Returns the minor number of the first version that is NOT 410 Gone. + A live version is one IE currently accepts; a 401 response is the + expected "auth required" signal and confirms the version is serving + traffic. Raises :class:`InvestEngineVersionError` if no live version + is found before ``max_minor``. + """ + for minor in range(start_minor, max_minor + 1): + resp = await client.get(_version_path(minor)) + if resp.status_code != 410: + return minor + raise InvestEngineVersionError( + f"No live /api/v0.3X/ between v0.{start_minor} and v0.{max_minor}") + + +def _parse_iso(ts: str) -> datetime: + """Accept both ``...Z`` and ``...+HH:MM`` suffixes.""" + return datetime.fromisoformat(ts.replace("Z", "+00:00")) + + +def _opt_decimal(raw: Any) -> Decimal | None: + if raw is None: + return None + return Decimal(str(raw)) + + +def _classify_type(raw_type: str) -> ActivityType | None: + """Map a raw IE type string to ActivityType, or None if unknown.""" + upper = raw_type.upper() + exact = _EXACT_TYPE_MAP.get(upper) + if exact is not None: + return exact + if "DEPOSIT" in upper: + return ActivityType.DEPOSIT + if "WITHDRAWAL" in upper or "WITHDRAW" in upper: + return ActivityType.WITHDRAWAL + return None + + +def _transaction_to_activity(raw: dict[str, Any]) -> Activity | None: + """Turn one IE transaction dict into a canonical Activity. + + The IE response shape is UNVERIFIED — these assumptions WILL need + review once Viktor pastes a live token: + + - ``id``: string or int (cast to str for ``external_id``) + - ``type``: upper-case string — ``BUY``/``SELL``/``DIVIDEND``/etc. + - ``symbol``: ticker string (may be missing on cash events) + - ``quantity`` / ``price`` / ``amount``: numeric-in-a-string or float + - ``currency``: ISO code; default ``GBP`` for an ISA + - ``date``: ISO-8601 with ``Z`` or offset suffix + + Unknown type strings are logged at WARNING and skipped — silent + misclassification would corrupt tax reporting, so we refuse to guess. + """ + raw_type = str(raw.get("type", "")) + activity_type = _classify_type(raw_type) + if activity_type is None: + log.warning( + "invest-engine: skipping transaction id=%s with unknown type=%r", + raw.get("id"), + raw_type, + ) + return None + + txn_id = raw.get("id") + if txn_id is None: + log.warning("invest-engine: skipping transaction with missing id: %r", raw) + return None + + currency = str(raw.get("currency") or "GBP") + date_str = raw.get("date") or raw.get("created_at") or raw.get("timestamp") + if not isinstance(date_str, str): + log.warning("invest-engine: skipping txn id=%s — no parseable date", txn_id) + return None + + quantity = _opt_decimal(raw.get("quantity")) + unit_price = _opt_decimal(raw.get("price") or raw.get("unit_price")) + amount = _opt_decimal(raw.get("amount") or raw.get("value")) + fee = _opt_decimal(raw.get("fee")) or Decimal("0") + symbol_raw = raw.get("symbol") or raw.get("ticker") + symbol = str(symbol_raw) if symbol_raw else None + + return Activity( + external_id=f"invest-engine:{txn_id}", + account_id=_ACCOUNT_ID, + account_type=AccountType.ISA, + date=_parse_iso(date_str), + activity_type=activity_type, + currency=currency, + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + amount=amount, + fee=fee, + ) + + +def _extract_list(page: dict[str, Any]) -> list[dict[str, Any]]: + """Handle both ``{results: [...]}`` and ``{data: [...]}`` shapes.""" + for key in ("results", "data"): + items = page.get(key) + if isinstance(items, list): + return [i for i in items if isinstance(i, dict)] + return [] + + +def _extract_next(page: dict[str, Any]) -> str | None: + """Pull the next-page URL from either DRF ``next`` or JSON:API ``meta.next_page``.""" + nxt = page.get("next") + if isinstance(nxt, str) and nxt: + return nxt + meta = page.get("meta") + if isinstance(meta, dict): + meta_nxt = meta.get("next_page") or meta.get("next") + if isinstance(meta_nxt, str) and meta_nxt: + return meta_nxt + return None + + +class InvestEngineProvider: + """Concrete Provider for InvestEngine. + + Only one logical account per user (one ISA, GBP). The token expiry is + tracked at the Python layer: the CLI alerts 3 days before expiry, and + this class fails fast if the clock says the token is already dead + (cheaper than burning a request for the same 401). + """ + + name = "invest-engine" + + def __init__( + self, + *, + bearer_token: str, + token_expires_at: datetime, + base_url: str = _DEFAULT_BASE_URL, + transport: httpx.AsyncBaseTransport | None = None, + ) -> None: + self._token = bearer_token + self._token_expires_at = token_expires_at + self._client = httpx.AsyncClient( + base_url=base_url, + timeout=30.0, + transport=transport, + headers={ + "Authorization": f"Bearer {bearer_token}", + "User-Agent": _USER_AGENT, + }, + ) + self._version_minor: int | None = None + + def accounts(self) -> list[Account]: + return [_ACCOUNT] + + async def close(self) -> None: + await self._client.aclose() + + async def _active_version(self, *, force: bool = False) -> int: + """Cache the live API minor. Set ``force=True`` after a 410 to re-probe.""" + if self._version_minor is not None and not force: + return self._version_minor + start = (self._version_minor + 1) if force and self._version_minor else _START_VERSION_MINOR + minor = await _probe_version(self._client, start_minor=start) + self._version_minor = minor + return minor + + async def _request_json(self, path: str, params: dict[str, str] | None = None) -> Any: + """GET ``path`` (relative), return JSON. Handles one 410 re-probe retry.""" + resp = await self._client.get(path, params=params) + if resp.status_code == 401: + raise InvestEngineTokenExpiredError( + f"InvestEngine rejected Bearer token (HTTP 401 on {path}); " + f"token_expires_at={self._token_expires_at.isoformat()}. " + f"Viktor must paste a new token into Vault.") + if resp.status_code == 410: + # Version rolled mid-session. Re-probe, retarget path, retry once. + old_minor = self._version_minor + new_minor = await self._active_version(force=True) + if old_minor is not None and new_minor != old_minor: + new_path = path.replace(f"/v0.{old_minor}/", f"/v0.{new_minor}/", 1) + retry = await self._client.get(new_path, params=params) + if retry.status_code == 401: + raise InvestEngineTokenExpiredError(f"InvestEngine 401 on retry {new_path}") + if retry.status_code != 200: + raise InvestEngineError( + f"InvestEngine {new_path} HTTP {retry.status_code} after re-probe") + return retry.json() + raise InvestEngineError(f"InvestEngine 410 on {path} and no newer version") + if resp.status_code != 200: + raise InvestEngineError( + f"InvestEngine {path} HTTP {resp.status_code}: {resp.text[:200]}") + return resp.json() + + async def fetch( + self, + *, + since: datetime | None = None, + before: datetime | None = None, + ) -> AsyncIterator[Activity]: + # Fail fast if the token is already known-dead. + if self._token_expires_at <= datetime.now(UTC): + raise InvestEngineTokenExpiredError( + f"InvestEngine token expired at {self._token_expires_at.isoformat()} — " + f"Viktor must paste a new token.") + + version = await self._active_version() + portfolio_ids = await self._list_portfolio_ids(version) + for pid in portfolio_ids: + async for activity in self._fetch_portfolio(version, pid, since, before): + yield activity + + async def _list_portfolio_ids(self, version: int) -> list[str]: + """Walk `/portfolios/` pagination and return the list of ids.""" + ids: list[str] = [] + path: str | None = f"/api/v0.{version}/portfolios/" + while path is not None: + page = await self._request_json(path) + if not isinstance(page, dict): + log.warning("invest-engine: /portfolios/ returned non-dict %r", type(page)) + break + for item in _extract_list(page): + pid = item.get("id") + if pid is None: + continue + ids.append(str(pid)) + path = _next_page_path(_extract_next(page), current=path) + return ids + + async def _fetch_portfolio( + self, + version: int, + portfolio_id: str, + since: datetime | None, + before: datetime | None, + ) -> AsyncIterator[Activity]: + params: dict[str, str] = {"portfolio": portfolio_id} + if since is not None: + params["start"] = since.date().isoformat() + if before is not None: + params["end"] = before.date().isoformat() + path: str | None = f"/api/v0.{version}/transactions/" + while path is not None: + page = await self._request_json( + path, params=params if path.endswith("/transactions/") else None) + if not isinstance(page, dict): + break + for raw in _extract_list(page): + activity = _transaction_to_activity(raw) + if activity is None: + continue + if since is not None and activity.date < since: + continue + if before is not None and activity.date >= before: + continue + yield activity + path = _next_page_path(_extract_next(page), current=path) + + +def _next_page_path(raw_next: str | None, *, current: str) -> str | None: + """Normalise a ``next`` URL to a request path. + + IE might emit full URLs (``https://investengine.com/api/v0.32/...``) + or relative paths. We return the path component only — the httpx + client holds the base URL. + """ + if raw_next is None: + return None + if raw_next.startswith("http://") or raw_next.startswith("https://"): + from urllib.parse import urlparse + parsed = urlparse(raw_next) + path = parsed.path + if parsed.query: + path = f"{path}?{parsed.query}" + return path + return raw_next diff --git a/tests/providers/test_invest_engine.py b/tests/providers/test_invest_engine.py new file mode 100644 index 0000000..742b196 --- /dev/null +++ b/tests/providers/test_invest_engine.py @@ -0,0 +1,488 @@ +from __future__ import annotations + +from collections.abc import Callable +from datetime import UTC, datetime, timedelta +from decimal import Decimal +from typing import Any + +import httpx +import pytest + +from broker_sync.models import AccountType, ActivityType +from broker_sync.providers.invest_engine import ( + InvestEngineError, + InvestEngineProvider, + InvestEngineTokenExpiredError, + InvestEngineVersionError, + _probe_version, + _transaction_to_activity, +) + +# -- helpers -- + + +def _future() -> datetime: + return datetime.now(UTC) + timedelta(days=30) + + +def _past() -> datetime: + return datetime.now(UTC) - timedelta(days=1) + + +def _client(handler: Callable[[httpx.Request], httpx.Response]) -> httpx.AsyncClient: + return httpx.AsyncClient( + base_url="https://investengine.com", + transport=httpx.MockTransport(handler), + ) + + +# -- version probe -- + + +async def test_probe_stops_at_first_live_version() -> None: + """v0.32 is live (401). Probe should return 32 without touching v0.33.""" + visited: list[str] = [] + + def handler(req: httpx.Request) -> httpx.Response: + visited.append(req.url.path) + return httpx.Response(401) + + async with _client(handler) as c: + minor = await _probe_version(c, start_minor=32) + assert minor == 32 + assert visited == ["/api/v0.32/"] + + +async def test_probe_skips_410_and_advances() -> None: + """v0.32 is Gone, v0.33 is live (401). Probe lands on 33.""" + visited: list[str] = [] + + def handler(req: httpx.Request) -> httpx.Response: + visited.append(req.url.path) + if "v0.32" in req.url.path: + return httpx.Response(410) + return httpx.Response(401) + + async with _client(handler) as c: + minor = await _probe_version(c, start_minor=32) + assert minor == 33 + assert visited == ["/api/v0.32/", "/api/v0.33/"] + + +async def test_probe_gives_up_after_max_minor() -> None: + """Every version 410s → explicit error rather than infinite loop.""" + + def handler(req: httpx.Request) -> httpx.Response: + return httpx.Response(410) + + async with _client(handler) as c: + with pytest.raises(InvestEngineVersionError): + await _probe_version(c, start_minor=32, max_minor=34) + + +# -- token expiry fail-fast -- + + +async def test_expired_token_raises_on_fetch() -> None: + """If expires_at is in the past, we fail before making any request.""" + + def handler(req: httpx.Request) -> httpx.Response: + raise AssertionError("should not have called the API") + + p = InvestEngineProvider( + bearer_token="x", + token_expires_at=_past(), + transport=httpx.MockTransport(handler), + ) + try: + with pytest.raises(InvestEngineTokenExpiredError): + async for _ in p.fetch(): + pass + finally: + await p.close() + + +# -- 401 during fetch -- + + +async def test_401_during_probe_is_live_version() -> None: + """401 on version-probe GET means version is live — we then request + the portfolios endpoint which, with a bad token, also 401s, and that + second 401 is what should surface as TokenExpired.""" + + def handler(req: httpx.Request) -> httpx.Response: + return httpx.Response(401) + + p = InvestEngineProvider( + bearer_token="dead-token", + token_expires_at=_future(), + transport=httpx.MockTransport(handler), + ) + try: + with pytest.raises(InvestEngineTokenExpiredError): + async for _ in p.fetch(): + pass + finally: + await p.close() + + +# -- headers -- + + +async def test_bearer_and_user_agent_headers_attached() -> None: + seen: list[tuple[str | None, str | None]] = [] + + def handler(req: httpx.Request) -> httpx.Response: + seen.append((req.headers.get("Authorization"), req.headers.get("User-Agent"))) + # Probe returns live; portfolios returns empty list shape. + if req.url.path.endswith("/portfolios/"): + return httpx.Response(200, json={"results": []}) + return httpx.Response(401) + + p = InvestEngineProvider( + bearer_token="abc123", + token_expires_at=_future(), + transport=httpx.MockTransport(handler), + ) + try: + async for _ in p.fetch(): + pass + finally: + await p.close() + # Probe + portfolios — both should carry the Bearer + UA. + assert len(seen) == 2 + for auth, ua in seen: + assert auth == "Bearer abc123" + assert ua is not None and "broker-sync" in ua + + +# -- accounts contract -- + + +def test_accounts_returns_single_isa() -> None: + p = InvestEngineProvider( + bearer_token="x", + token_expires_at=_future(), + ) + accs = p.accounts() + assert [a.id for a in accs] == ["invest-engine-primary"] + assert accs[0].account_type is AccountType.ISA + assert accs[0].currency == "GBP" + assert accs[0].provider == "invest-engine" + + +def test_provider_name() -> None: + assert InvestEngineProvider.name == "invest-engine" + + +# -- transaction → activity mapping -- +# +# The real IE response shape is UNVERIFIED (MFA blocks authed probes). +# These tests use best-guess shapes based on Django REST conventions. +# `_transaction_to_activity` is written defensively so alternative casings +# and common field names round-trip correctly. + + +def _mock_txn( + *, + txn_id: str = "txn-1", + txn_type: str = "BUY", + symbol: str = "VUAG", + quantity: str = "10", + price: str = "90.5", + amount: str = "905.00", + currency: str = "GBP", + date: str = "2026-04-01T10:00:00Z", +) -> dict[str, Any]: + return { + "id": txn_id, + "type": txn_type, + "symbol": symbol, + "quantity": quantity, + "price": price, + "amount": amount, + "currency": currency, + "date": date, + } + + +def test_buy_txn_becomes_buy_activity() -> None: + a = _transaction_to_activity(_mock_txn(txn_type="BUY")) + assert a is not None + assert a.activity_type is ActivityType.BUY + assert a.external_id == "invest-engine:txn-1" + assert a.account_id == "invest-engine-primary" + assert a.account_type is AccountType.ISA + assert a.symbol == "VUAG" + assert a.quantity == Decimal("10") + assert a.unit_price == Decimal("90.5") + assert a.currency == "GBP" + + +def test_sell_txn_becomes_sell_activity() -> None: + a = _transaction_to_activity(_mock_txn(txn_type="SELL", quantity="5")) + assert a is not None + assert a.activity_type is ActivityType.SELL + assert a.quantity == Decimal("5") + + +def test_dividend_txn_becomes_dividend_with_amount() -> None: + raw = _mock_txn(txn_type="DIVIDEND", amount="12.34") + raw.pop("quantity") + raw.pop("price") + a = _transaction_to_activity(raw) + assert a is not None + assert a.activity_type is ActivityType.DIVIDEND + assert a.amount == Decimal("12.34") + + +def test_deposit_txn_mapped() -> None: + raw = _mock_txn(txn_type="DEPOSIT", amount="500.00") + raw.pop("quantity") + raw.pop("price") + a = _transaction_to_activity(raw) + assert a is not None + assert a.activity_type is ActivityType.DEPOSIT + assert a.amount == Decimal("500.00") + + +def test_withdrawal_txn_mapped() -> None: + raw = _mock_txn(txn_type="WITHDRAWAL", amount="100.00") + raw.pop("quantity") + raw.pop("price") + a = _transaction_to_activity(raw) + assert a is not None + assert a.activity_type is ActivityType.WITHDRAWAL + + +def test_unknown_txn_type_is_skipped_with_warning(caplog: pytest.LogCaptureFixture, ) -> None: + raw = _mock_txn(txn_type="MYSTERY_EVENT") + a = _transaction_to_activity(raw) + assert a is None + assert any("MYSTERY_EVENT" in r.message for r in caplog.records) + + +def test_date_parsing_handles_z_suffix() -> None: + a = _transaction_to_activity(_mock_txn(date="2026-04-01T10:00:00Z")) + assert a is not None + assert a.date == datetime(2026, 4, 1, 10, 0, tzinfo=UTC) + + +def test_date_parsing_handles_offset_suffix() -> None: + a = _transaction_to_activity(_mock_txn(date="2026-04-01T10:00:00+00:00")) + assert a is not None + assert a.date == datetime(2026, 4, 1, 10, 0, tzinfo=UTC) + + +# -- end-to-end fetch (portfolios + transactions happy path) -- + + +async def test_fetch_enumerates_portfolios_and_transactions() -> None: + # Mock Django-REST-style paginated response with results + next. + portfolios = {"results": [{"id": 7, "name": "Viktor's ISA"}], "next": None} + dividend = _mock_txn(txn_id="t2", txn_type="DIVIDEND", amount="5.00") + dividend.pop("quantity") + dividend.pop("price") + transactions: dict[str, Any] = { + "results": [ + _mock_txn(txn_id="t1", txn_type="BUY"), + dividend, + ], + "next": None, + } + + visited: list[str] = [] + + def handler(req: httpx.Request) -> httpx.Response: + visited.append(req.url.path) + if req.url.path == "/api/v0.32/": + return httpx.Response(401) + if req.url.path == "/api/v0.32/portfolios/": + return httpx.Response(200, json=portfolios) + if req.url.path == "/api/v0.32/transactions/": + assert req.url.params.get("portfolio") == "7" + return httpx.Response(200, json=transactions) + raise AssertionError(f"unexpected path: {req.url.path}") + + p = InvestEngineProvider( + bearer_token="good-token", + token_expires_at=_future(), + transport=httpx.MockTransport(handler), + ) + try: + out = [a async for a in p.fetch()] + finally: + await p.close() + + assert [a.external_id for a in out] == [ + "invest-engine:t1", + "invest-engine:t2", + ] + + +async def test_fetch_supports_data_meta_pagination_shape() -> None: + """Defensive: handle the alternative {data, meta.next_page} shape too.""" + portfolios = {"data": [{"id": 9, "name": "ISA"}], "meta": {"next_page": None}} + transactions = { + "data": [_mock_txn(txn_id="dm1")], + "meta": { + "next_page": None + }, + } + + def handler(req: httpx.Request) -> httpx.Response: + if req.url.path == "/api/v0.32/": + return httpx.Response(401) + if req.url.path == "/api/v0.32/portfolios/": + return httpx.Response(200, json=portfolios) + if req.url.path == "/api/v0.32/transactions/": + return httpx.Response(200, json=transactions) + raise AssertionError(f"unexpected: {req.url.path}") + + p = InvestEngineProvider( + bearer_token="t", + token_expires_at=_future(), + transport=httpx.MockTransport(handler), + ) + try: + out = [a async for a in p.fetch()] + finally: + await p.close() + assert [a.external_id for a in out] == ["invest-engine:dm1"] + + +# -- since filter -- + + +async def test_since_drops_older_transactions() -> None: + txns = { + "results": [ + _mock_txn(txn_id="old", date="2020-01-01T00:00:00Z"), + _mock_txn(txn_id="new", date="2026-04-01T10:00:00Z"), + ], + "next": + None, + } + + def handler(req: httpx.Request) -> httpx.Response: + if req.url.path == "/api/v0.32/": + return httpx.Response(401) + if req.url.path == "/api/v0.32/portfolios/": + return httpx.Response(200, json={"results": [{"id": 1}]}) + return httpx.Response(200, json=txns) + + p = InvestEngineProvider( + bearer_token="t", + token_expires_at=_future(), + transport=httpx.MockTransport(handler), + ) + try: + since = datetime(2026, 1, 1, tzinfo=UTC) + out = [a async for a in p.fetch(since=since)] + finally: + await p.close() + assert [a.external_id for a in out] == ["invest-engine:new"] + + +# -- 401 on a data endpoint → TokenExpired -- + + +async def test_401_on_portfolios_triggers_token_expired() -> None: + + def handler(req: httpx.Request) -> httpx.Response: + if req.url.path == "/api/v0.32/": + return httpx.Response(401) + if req.url.path == "/api/v0.32/portfolios/": + return httpx.Response(401, json={"detail": "Invalid token"}) + raise AssertionError(f"unexpected: {req.url.path}") + + p = InvestEngineProvider( + bearer_token="stale", + token_expires_at=_future(), # clock says alive, server says dead + transport=httpx.MockTransport(handler), + ) + try: + with pytest.raises(InvestEngineTokenExpiredError): + async for _ in p.fetch(): + pass + finally: + await p.close() + + +# -- 410 on a data endpoint → one re-probe + retry -- + + +async def test_410_on_data_triggers_reprobe_and_retry() -> None: + # Scenario: probe lands on v0.32, then the portfolios call 410s because + # IE just rolled the version mid-session. We re-probe, find v0.33, and + # retry the call. We verify both versions are hit and we don't loop. + visited: list[str] = [] + portfolios_call_count = 0 + + def handler(req: httpx.Request) -> httpx.Response: + visited.append(req.url.path) + # Version probe endpoints: 32 was live when process started; new probe + # now shows 32 Gone and 33 live. + if req.url.path == "/api/v0.32/": + # First probe (process start) → live (401). + # Second probe (after 410) → Gone. + if "/api/v0.32/portfolios/" in [v for v in visited]: + return httpx.Response(410) + return httpx.Response(401) + if req.url.path == "/api/v0.33/": + return httpx.Response(401) + if req.url.path == "/api/v0.32/portfolios/": + nonlocal portfolios_call_count + portfolios_call_count += 1 + return httpx.Response(410, json={"detail": "Version Gone"}) + if req.url.path == "/api/v0.33/portfolios/": + return httpx.Response(200, json={"results": []}) + raise AssertionError(f"unexpected: {req.url.path}") + + p = InvestEngineProvider( + bearer_token="t", + token_expires_at=_future(), + transport=httpx.MockTransport(handler), + ) + try: + out = [a async for a in p.fetch()] + finally: + await p.close() + + assert out == [] + assert "/api/v0.32/portfolios/" in visited + assert "/api/v0.33/" in visited + assert "/api/v0.33/portfolios/" in visited + # Exactly one 410 on v0.32/portfolios/; no repeat loop. + assert portfolios_call_count == 1 + + +# -- integration stub -- + + +@pytest.mark.skip(reason="needs live token — flip on manually") +async def test_live_integration_smoke() -> None: # pragma: no cover + """Real API smoke test. Enable manually after Viktor pastes a token.""" + import os + + token = os.environ.get("IE_BEARER_TOKEN") + if not token: + pytest.skip("IE_BEARER_TOKEN not set") + p = InvestEngineProvider( + bearer_token=token, + token_expires_at=_future(), + ) + try: + out = [a async for a in p.fetch(since=_past())] + finally: + await p.close() + # No assertions on content yet — just proves a live round-trip works. + assert isinstance(out, list) + + +# -- smoke check InvestEngineError is public -- + + +def test_error_types_public() -> None: + assert issubclass(InvestEngineTokenExpiredError, InvestEngineError) + assert issubclass(InvestEngineVersionError, InvestEngineError) From f49918c74d7efad25df7f02a66391addbbf1bea8 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 21:59:31 +0000 Subject: [PATCH 03/44] Add broker-sync invest-engine CLI subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Context: Phase 2b wiring — hand the bearer-token InvestEngineProvider into the existing sync pipeline (sync_provider_to_wealthfolio), mirroring the trading212 subcommand. Environment contract: WF_BASE_URL, WF_USERNAME, WF_PASSWORD, WF_SESSION_PATH (shared with trading212) IE_BEARER_TOKEN (devtools-pasted) IE_TOKEN_EXPIRES_AT (ISO-8601; Viktor sets on paste) BROKER_SYNC_DATA_DIR (sync.db + checkpoint state) Exit codes: 0 = clean run 1 = some rows failed to import (mirrors trading212 behaviour) 2 = token already expired per IE_TOKEN_EXPIRES_AT, or malformed ISO timestamp, or live 401 response from IE (InvestEngineTokenExpiredError), or unknown --mode flag The pre-request expiry check is deliberate: a CronJob that runs during the refresh window would otherwise waste a request on a dead token and get the same 401 that we already know about from the clock. Exit 2 from the clock-only path also separates "token is old" from "wealthfolio rejected a batch" in the CronJob alert pipeline. Mode defaults: --mode steady → since = now - 30d (bigger window than T212's 7d because the IE sync only runs once a month in steady state; 30d guarantees no gap even after a missed run) --mode backfill → since = None (full history) This change: - `invest-engine` subcommand added to broker_sync/cli.py - Token-expiry pre-check (clock), IE_TOKEN_EXPIRES_AT ISO parsing with a UTC default for naive timestamps, and graceful handling of InvestEngineTokenExpiredError surfaced during pipeline run - 3 new tests in tests/test_cli.py covering the 3 exit-2 paths ## Automated poetry run pytest tests/test_cli.py -v ======================== 4 passed in 0.28s ========================= poetry run pytest -q 98 passed, 1 skipped in 0.85s poetry run mypy --strict . Success: no issues found in 34 source files poetry run ruff check . All checks passed! ## Manual Verification 1. Populate Vault keys per the docstring in broker_sync/providers/invest_engine.py (Viktor pastes token + sets expires_at to the Monday morning of next month). 2. Set env: export WF_BASE_URL=https://wealthfolio.viktorbarzin.me export WF_USERNAME=viktor export WF_PASSWORD= export IE_BEARER_TOKEN= export IE_TOKEN_EXPIRES_AT= export BROKER_SYNC_DATA_DIR=/tmp/ie-smoke 3. poetry run broker-sync invest-engine --mode backfill Expected: single line "invest-engine: fetched=N new=M imported=M failed=0" on success; exit 2 with "InvestEngine token expired..." if the clock or server disagrees; exit 2 with "IE_TOKEN_EXPIRES_AT not a valid ISO-8601 timestamp..." if the env var is malformed. Co-Authored-By: Claude Opus 4.7 (1M context) --- broker_sync/cli.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++ tests/test_cli.py | 65 ++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) diff --git a/broker_sync/cli.py b/broker_sync/cli.py index ce4407a..af5b08a 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -137,6 +137,99 @@ def trading212( asyncio.run(_run()) +@app.command("invest-engine") +def invest_engine( + wf_base_url: str = typer.Option(..., envvar="WF_BASE_URL"), + wf_username: str = typer.Option(..., envvar="WF_USERNAME"), + wf_password: str = typer.Option(..., envvar="WF_PASSWORD"), + wf_session_path: str = typer.Option("/data/wealthfolio_session.json", envvar="WF_SESSION_PATH"), + ie_bearer_token: str = typer.Option(..., envvar="IE_BEARER_TOKEN"), + ie_token_expires_at: str = typer.Option(..., envvar="IE_TOKEN_EXPIRES_AT"), + data_dir: str = typer.Option("/data", envvar="BROKER_SYNC_DATA_DIR"), + mode: str = typer.Option("steady", help="steady = last-30-days; backfill = full history"), +) -> None: + """Phase 2b — sync InvestEngine activity into Wealthfolio via Bearer token. + + The Bearer token is pasted from browser devtools by Viktor (MFA blocks + scripted login). IE_TOKEN_EXPIRES_AT is the ISO-8601 timestamp he sets + when he pastes it; we fail fast with exit=2 if that moment has passed + so a CronJob that runs past the refresh window doesn't burn a request + on a known-dead token. + """ + from broker_sync.dedup import SyncRecordStore + from broker_sync.pipeline import sync_provider_to_wealthfolio + from broker_sync.providers.invest_engine import ( + InvestEngineProvider, + InvestEngineTokenExpiredError, + ) + from broker_sync.sinks.wealthfolio import WealthfolioSink + + _setup_logging() + + try: + expires_at = datetime.fromisoformat(ie_token_expires_at) + except ValueError as e: + typer.echo(f"IE_TOKEN_EXPIRES_AT not a valid ISO-8601 timestamp: {e}", err=True) + sys.exit(2) + if expires_at.tzinfo is None: + expires_at = expires_at.replace(tzinfo=UTC) + if expires_at <= datetime.now(UTC): + typer.echo( + f"InvestEngine token expired at {expires_at.isoformat()} — " + f"Viktor must paste a fresh Bearer into Vault.", + err=True, + ) + sys.exit(2) + + data = Path(data_dir) + data.mkdir(parents=True, exist_ok=True) + + if mode == "steady": + since: datetime | None = datetime.now(UTC) - timedelta(days=30) + elif mode == "backfill": + since = None + else: + typer.echo(f"Unknown mode: {mode!r}. Use 'steady' or 'backfill'.", err=True) + sys.exit(2) + + async def _run() -> None: + sink = WealthfolioSink( + base_url=wf_base_url, + username=wf_username, + password=wf_password, + session_path=wf_session_path, + ) + provider = InvestEngineProvider( + bearer_token=ie_bearer_token, + token_expires_at=expires_at, + ) + dedup = SyncRecordStore(data / "sync.db") + try: + if not Path(wf_session_path).exists(): + await sink.login() + result = await sync_provider_to_wealthfolio( + provider=provider, + sink=sink, + dedup=dedup, + since=since, + ) + except InvestEngineTokenExpiredError as e: + typer.echo(f"InvestEngine auth failed: {e}", err=True) + sys.exit(2) + finally: + await provider.close() + await sink.close() + + typer.echo(f"invest-engine: fetched={result.fetched} " + f"new={result.new_after_dedup} " + f"imported={result.imported} " + f"failed={result.failed}") + if result.failed > 0: + sys.exit(1) + + asyncio.run(_run()) + + def _setup_logging() -> None: logging.basicConfig( level=logging.INFO, diff --git a/tests/test_cli.py b/tests/test_cli.py index d05a3e7..b510912 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + from typer.testing import CliRunner from broker_sync import __version__ @@ -10,3 +14,64 @@ def test_version_prints_package_version() -> None: result = runner.invoke(app, ["version"]) assert result.exit_code == 0 assert __version__ in result.stdout + + +# -- invest-engine CLI -- + + +def _future_iso() -> str: + return (datetime.now(UTC) + timedelta(days=30)).isoformat() + + +def _past_iso() -> str: + return (datetime.now(UTC) - timedelta(days=1)).isoformat() + + +def test_invest_engine_expired_token_exits_2() -> None: + """Guard against burning a request on a token the user already knows is dead.""" + result = runner.invoke( + app, + ["invest-engine"], + env={ + "WF_BASE_URL": "https://wf.example.com", + "WF_USERNAME": "u", + "WF_PASSWORD": "p", + "IE_BEARER_TOKEN": "anything", + "IE_TOKEN_EXPIRES_AT": _past_iso(), + "BROKER_SYNC_DATA_DIR": "/tmp", + }, + ) + assert result.exit_code == 2, result.output + assert "expired" in result.output.lower() or "token" in result.output.lower() + + +def test_invest_engine_unknown_mode_exits_2() -> None: + result = runner.invoke( + app, + ["invest-engine", "--mode", "nonsense"], + env={ + "WF_BASE_URL": "https://wf.example.com", + "WF_USERNAME": "u", + "WF_PASSWORD": "p", + "IE_BEARER_TOKEN": "t", + "IE_TOKEN_EXPIRES_AT": _future_iso(), + "BROKER_SYNC_DATA_DIR": "/tmp", + }, + ) + assert result.exit_code == 2 + + +def test_invest_engine_malformed_expires_exits_2() -> None: + result = runner.invoke( + app, + ["invest-engine"], + env={ + "WF_BASE_URL": "https://wf.example.com", + "WF_USERNAME": "u", + "WF_PASSWORD": "p", + "IE_BEARER_TOKEN": "t", + "IE_TOKEN_EXPIRES_AT": "not-an-iso-date", + "BROKER_SYNC_DATA_DIR": "/tmp", + }, + ) + assert result.exit_code == 2 From f089b8b93a6a72009006f49eb7c115d316f0cffe Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 22:08:40 +0000 Subject: [PATCH 04/44] Add Schwab email parser (port from finance/) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Schwab's workplace-RSU confirmation emails have 5 data td elements with class='dark-background-body' align='right': date, direction, qty, ticker, price-with-currency-sign. One email → one Activity. - parse_schwab_email(raw_html) -> list[Activity] (1-item or empty) - Empty on any parse failure (IMAP batch shouldn't crash on one bad mail) - Deterministic external_id ('schwab:date:ticker:type:qty') — stable across re-pulls so dedup works - Hardcoded to account 'schwab-workplace' / AccountType.GIA / USD - 6 unit tests: SELL + BUY happy path, malformed, missing cells, external-id stability, commas in price Dropped from the original finance port: - msg_timestamp-based external id (non-deterministic — would re-import on every IMAP walk). Replaced with a hash-stable key. - Currency.from_sign() currency hack. Schwab US is USD-only; we'll add FX when that changes. poetry run pytest -q → 109 passed, 1 skipped poetry run mypy → clean (added types-python-dateutil) poetry run ruff check → clean --- broker_sync/providers/parsers/schwab.py | 75 ++++++++++++++++++++++ poetry.lock | 14 ++++- pyproject.toml | 1 + tests/providers/parsers/test_schwab.py | 84 +++++++++++++++++++++++++ 4 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 broker_sync/providers/parsers/schwab.py create mode 100644 tests/providers/parsers/test_schwab.py diff --git a/broker_sync/providers/parsers/schwab.py b/broker_sync/providers/parsers/schwab.py new file mode 100644 index 0000000..fe5f5f3 --- /dev/null +++ b/broker_sync/providers/parsers/schwab.py @@ -0,0 +1,75 @@ +"""Schwab workplace-RSU email parser. + +Schwab sends HTML transaction-confirmation emails with the core fields in +five `` elements: +1. Trade date (human format — e.g. "Jan 23, 2025") +2. Direction word ("Sold" for SELL; anything else is BUY) +3. Quantity (share count, float) +4. Ticker +5. Price ("$123.45" — currency-sign-prefixed) + +One email → one Activity. On any parse failure we return an empty list +(same as the original finance/ behaviour — an unparseable email shouldn't +crash the whole IMAP batch). + +Ported from finance/position/provider/schwab/message_parser.py (39 lines). +Dropped: per-row timestamp id suffix (we use ISO date + ticker + qty which +is stable across re-pulls), currency-from-sign hackery (US Schwab is USD- +only in practice — if that ever changes we'll add FX on parse). +""" +from __future__ import annotations + +from decimal import Decimal, InvalidOperation + +from bs4 import BeautifulSoup +from dateutil import parser as dateparser + +from broker_sync.models import AccountType, Activity, ActivityType + +_ACCOUNT_ID = "schwab-workplace" +_DEFAULT_CURRENCY = "USD" + + +def parse_schwab_email(raw_html: str) -> list[Activity]: + """Return a single-item list of Activity on success, empty on failure.""" + try: + soup = BeautifulSoup(raw_html, "html.parser") + cells = [ + td.get_text(strip=True) for td in soup.find_all("td", { + "class": "dark-background-body", + "align": "right" + }) + ] + if len(cells) < 5: + return [] + + date_txt, direction_txt, qty_txt, ticker, price_txt = cells[:5] + trade_date = dateparser.parse(date_txt) + direction = (ActivityType.SELL + if direction_txt.strip().lower() == "sold" else ActivityType.BUY) + quantity = Decimal(qty_txt.replace(",", "").strip()) + # Price like "$123.45" — strip the currency sign and parse the numeric tail. + # Handle "£", "€", "USD", etc. by taking the last numeric span. + price_clean = price_txt + for sign in ("$", "£", "€", "USD", "GBP", "EUR"): + price_clean = price_clean.replace(sign, "") + unit_price = Decimal(price_clean.replace(",", "").strip()) + + external_id = (f"schwab:{trade_date.date().isoformat()}:{ticker}:" + f"{direction.value}:{quantity}") + return [ + Activity( + external_id=external_id, + account_id=_ACCOUNT_ID, + account_type=AccountType.GIA, + date=trade_date, + activity_type=direction, + symbol=ticker.strip(), + quantity=quantity, + unit_price=unit_price, + currency=_DEFAULT_CURRENCY, + notes=f"schwab-email:{direction_txt}", + ) + ] + except (ValueError, InvalidOperation, IndexError, AttributeError): + return [] diff --git a/poetry.lock b/poetry.lock index 58029c8..73fc482 100644 --- a/poetry.lock +++ b/poetry.lock @@ -628,6 +628,18 @@ rich = ">=10.11.0" shellingham = ">=1.3.0" typing-extensions = ">=3.7.4.3" +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20260408" +description = "Typing stubs for python-dateutil" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "types_python_dateutil-2.9.0.20260408-py3-none-any.whl", hash = "sha256:473139d514a71c9d1fbd8bb328974bedcb1cc3dba57aad04ffa4157f483c216f"}, + {file = "types_python_dateutil-2.9.0.20260408.tar.gz", hash = "sha256:8b056ec01568674235f64ecbcef928972a5fac412f5aab09c516dfa2acfbb582"}, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -658,4 +670,4 @@ platformdirs = ">=3.5.1" [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "b9c19ac1963682740a98cd539d3790ff180c2e8195d5cfcc9572da855db3fa7d" +content-hash = "04a3e24fe45c75f975140aff6076af0a156772a1a8e82eba30ee2345ac1d8bd6" diff --git a/pyproject.toml b/pyproject.toml index adcf5cc..0a25a66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ pytest-asyncio = "^0.23" mypy = "^1.11" ruff = "^0.6" yapf = "^0.43" +types-python-dateutil = "^2.9.0.20260408" [tool.poetry.scripts] broker-sync = "broker_sync.cli:app" diff --git a/tests/providers/parsers/test_schwab.py b/tests/providers/parsers/test_schwab.py new file mode 100644 index 0000000..8e3c736 --- /dev/null +++ b/tests/providers/parsers/test_schwab.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from decimal import Decimal + +from broker_sync.models import AccountType, ActivityType +from broker_sync.providers.parsers.schwab import parse_schwab_email + +_SELL = """ + + + + + + + +
DateJan 23, 2025
ActionSold
Quantity100.0
TickerMETA
Price$612.34
+ +""" + +_BUY = """ + + + + + + +
2024-11-15
Bought
5.5
AAPL
$225.00
+""" + +_MALFORMED = "no transaction here" + +_MISSING_CELLS = """ + + + +
Jan 23, 2025
Sold
+""" + + +def test_sell_email_parses_to_one_sell_activity() -> None: + acts = parse_schwab_email(_SELL) + assert len(acts) == 1 + a = acts[0] + assert a.activity_type is ActivityType.SELL + assert a.symbol == "META" + assert a.quantity == Decimal("100.0") + assert a.unit_price == Decimal("612.34") + assert a.currency == "USD" + assert a.account_id == "schwab-workplace" + assert a.account_type is AccountType.GIA + assert a.date.date().isoformat() == "2025-01-23" + + +def test_buy_email_becomes_buy_activity() -> None: + acts = parse_schwab_email(_BUY) + assert len(acts) == 1 + a = acts[0] + assert a.activity_type is ActivityType.BUY + assert a.symbol == "AAPL" + assert a.quantity == Decimal("5.5") + assert a.unit_price == Decimal("225.00") + + +def test_malformed_email_returns_empty_list() -> None: + # No matching td cells at all. + assert parse_schwab_email(_MALFORMED) == [] + + +def test_missing_cells_returns_empty_list() -> None: + # Only 2 of the 5 required cells — parser must bail cleanly. + assert parse_schwab_email(_MISSING_CELLS) == [] + + +def test_external_id_is_stable_across_reruns() -> None: + # Same email → same external_id (deterministic, not timestamp-based). + a1 = parse_schwab_email(_SELL)[0] + a2 = parse_schwab_email(_SELL)[0] + assert a1.external_id == a2.external_id + + +def test_price_with_commas_parses() -> None: + html = _SELL.replace("$612.34", "$1,612.34") + a = parse_schwab_email(html)[0] + assert a.unit_price == Decimal("1612.34") From 6efd03570ab17583558b8e4053268f39dc27060c Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 22:12:05 +0000 Subject: [PATCH 05/44] Add imap-ingest CLI + ImapProvider: route emails to IE/Schwab parsers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the IE + Schwab email parsers into an actual runnable sync. Walks the IMAP mailbox, routes each message by sender domain: - *@investengine.com → invest_engine.parse_invest_engine_email - *@schwab.com → schwab.parse_schwab_email then pushes the resulting Activities through the shared pipeline. broker-sync imap-ingest — new CLI command taking IMAP_HOST/USER/PASSWORD/ DIRECTORY (mirrors the old wealthfolio-sync image's env shape so the Terraform CronJob's existing env wiring works unchanged). Verified: poetry run pytest -q → 109 passed + 1 skipped; mypy strict clean (37 files); ruff + yapf clean. --- broker_sync/cli.py | 63 +++++++++++ broker_sync/pipeline.py | 8 +- broker_sync/providers/imap.py | 189 +++++++++++++++++++++++++++++++ broker_sync/sinks/wealthfolio.py | 18 +-- tests/sinks/test_wealthfolio.py | 29 +++-- tests/test_pipeline.py | 18 +-- 6 files changed, 290 insertions(+), 35 deletions(-) create mode 100644 broker_sync/providers/imap.py diff --git a/broker_sync/cli.py b/broker_sync/cli.py index af5b08a..ea7d8c9 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -230,6 +230,69 @@ def invest_engine( asyncio.run(_run()) +@app.command("imap-ingest") +def imap_ingest( + wf_base_url: str = typer.Option(..., envvar="WF_BASE_URL"), + wf_username: str = typer.Option(..., envvar="WF_USERNAME"), + wf_password: str = typer.Option(..., envvar="WF_PASSWORD"), + wf_session_path: str = typer.Option("/data/wealthfolio_session.json", + envvar="WF_SESSION_PATH"), + imap_host: str = typer.Option(..., envvar="IMAP_HOST"), + imap_user: str = typer.Option(..., envvar="IMAP_USER"), + imap_password: str = typer.Option(..., envvar="IMAP_PASSWORD"), + imap_directory: str = typer.Option("INBOX", envvar="IMAP_DIRECTORY"), + data_dir: str = typer.Option("/data", envvar="BROKER_SYNC_DATA_DIR"), +) -> None: + """Phase 2/3 — ingest InvestEngine + Schwab confirmation emails via IMAP. + + Walks the mailbox, routes each message by `From:` sender domain to the + matching parser, pushes any resulting activities through the shared + pipeline (dedup → Wealthfolio CSV-free JSON import). + """ + from broker_sync.dedup import SyncRecordStore + from broker_sync.pipeline import sync_provider_to_wealthfolio + from broker_sync.providers.imap import ImapCreds, ImapProvider + from broker_sync.sinks.wealthfolio import WealthfolioSink + + _setup_logging() + data = Path(data_dir) + data.mkdir(parents=True, exist_ok=True) + + async def _run() -> None: + sink = WealthfolioSink( + base_url=wf_base_url, + username=wf_username, + password=wf_password, + session_path=wf_session_path, + ) + provider = ImapProvider( + ImapCreds( + host=imap_host, + user=imap_user, + password=imap_password, + directory=imap_directory, + )) + dedup = SyncRecordStore(data / "sync.db") + try: + if not Path(wf_session_path).exists(): + await sink.login() + result = await sync_provider_to_wealthfolio( + provider=provider, + sink=sink, + dedup=dedup, + ) + finally: + await sink.close() + typer.echo(f"imap-ingest: fetched={result.fetched} " + f"new={result.new_after_dedup} " + f"imported={result.imported} " + f"failed={result.failed}") + if result.failed > 0: + sys.exit(1) + + asyncio.run(_run()) + + def _setup_logging() -> None: logging.basicConfig( level=logging.INFO, diff --git a/broker_sync/pipeline.py b/broker_sync/pipeline.py index 12caca7..7921934 100644 --- a/broker_sync/pipeline.py +++ b/broker_sync/pipeline.py @@ -89,9 +89,7 @@ async def sync_provider_to_wealthfolio( ) -async def _ensure_accounts( - sink: WealthfolioSink, accounts: list[Account] -) -> dict[str, str]: +async def _ensure_accounts(sink: WealthfolioSink, accounts: list[Account]) -> dict[str, str]: """Return {our_account_id: wealthfolio_uuid}.""" out: dict[str, str] = {} for account in accounts: @@ -134,7 +132,9 @@ async def _flush_batch( for original_account_id, a in batch: wf_id = by_external.get(a.external_id) dedup.record( - provider_name, original_account_id, a.external_id, + provider_name, + original_account_id, + a.external_id, wealthfolio_activity_id=wf_id, ) ok += 1 diff --git a/broker_sync/providers/imap.py b/broker_sync/providers/imap.py new file mode 100644 index 0000000..de46aa9 --- /dev/null +++ b/broker_sync/providers/imap.py @@ -0,0 +1,189 @@ +"""IMAP email ingestor: dispatches messages to the matching parser by sender. + +Used by the `imap-ingest` CLI command for InvestEngine + Schwab confirmation +emails. Each message passes through: + +1. Pull ALL messages from the configured mailbox directory. +2. Route each by `From:` to a parser: + - noreply@investengine.com (+ equivalents) → invest_engine parser + - Schwab confirmations (equityawards@schwab.com, etc.) → schwab parser +3. Merge parser output into one list[Activity] with source attribution. + +Not imap-idle; runs once per invocation. Designed for a daily CronJob. +""" +from __future__ import annotations + +import email +import imaplib +import logging +import re +import ssl +from collections.abc import AsyncIterator, Iterator +from datetime import datetime +from email.message import Message +from typing import NamedTuple + +from broker_sync.models import Account, AccountType, Activity +from broker_sync.providers.parsers import invest_engine as ie_parser +from broker_sync.providers.parsers.schwab import parse_schwab_email + +log = logging.getLogger(__name__) + +_IE_SENDERS = {"noreply@investengine.com", "hello@investengine.com"} +_SCHWAB_SENDERS = { + "equityawards@schwab.com", + "donotreply@schwab.com", + "wealthnotify@schwab.com", +} + +_ADDR_RE = re.compile(r"[\w.+-]+@[\w-]+(?:\.[\w-]+)+") + + +class ImapCreds(NamedTuple): + host: str + user: str + password: str + directory: str + + +def _extract_sender(msg: Message) -> str: + raw = msg.get("From", "") + m = _ADDR_RE.search(raw) + return (m.group(0) if m else "").lower() + + +def _html_or_text(msg: Message) -> str: + """Return the richest body available (prefer HTML).""" + if msg.is_multipart(): + html = None + plain = None + for part in msg.walk(): + ct = part.get_content_type() + if ct == "text/html" and html is None: + html = part.get_payload(decode=True) + elif ct == "text/plain" and plain is None: + plain = part.get_payload(decode=True) + body = html or plain + else: + body = msg.get_payload(decode=True) + if body is None: + return "" + if isinstance(body, bytes): + charset = msg.get_content_charset() or "utf-8" + try: + return body.decode(charset, errors="replace") + except LookupError: + return body.decode("utf-8", errors="replace") + return str(body) + + +def _fetch_all(creds: ImapCreds) -> Iterator[bytes]: + ctx = ssl.create_default_context() + with imaplib.IMAP4_SSL(creds.host, ssl_context=ctx) as m: + m.login(creds.user, creds.password) + typ, _ = m.select(creds.directory, readonly=True) + if typ != "OK": + raise RuntimeError(f"IMAP select {creds.directory} failed: {typ}") + typ, data = m.search(None, "ALL") + if typ != "OK": + raise RuntimeError(f"IMAP search failed: {typ}") + ids = data[0].split() + log.info("imap: fetching %d messages from %s", len(ids), creds.directory) + for uid in ids: + typ, rsp = m.fetch(uid, "(RFC822)") + if typ != "OK" or not rsp or not rsp[0]: + continue + raw = rsp[0][1] + if isinstance(raw, bytes): + yield raw + + +def fetch_activities(creds: ImapCreds) -> list[Activity]: + out: list[Activity] = [] + ie_parsed = schwab_parsed = skipped = 0 + for raw in _fetch_all(creds): + try: + msg = email.message_from_bytes(raw) + except Exception: + skipped += 1 + continue + sender = _extract_sender(msg) + if sender in _IE_SENDERS or sender.endswith("@investengine.com"): + out.extend(ie_parser.parse_invest_engine_email(raw)) + ie_parsed += 1 + elif sender in _SCHWAB_SENDERS or sender.endswith("@schwab.com"): + html = _html_or_text(msg) + out.extend(parse_schwab_email(html)) + schwab_parsed += 1 + else: + skipped += 1 + log.info( + "imap: ie_parsed=%d schwab_parsed=%d skipped=%d → %d activities", + ie_parsed, + schwab_parsed, + skipped, + len(out), + ) + return out + + +class ImapProvider: + """Wraps the IMAP fetch + per-sender parse into the Provider protocol. + + Yields both InvestEngine AND Schwab activities — downstream the + pipeline's dedup keyed on (provider, account, external_id) already + isolates them by account_id. + """ + name = "imap" + + def __init__(self, creds: ImapCreds) -> None: + self._creds = creds + + def accounts(self) -> list[Account]: + return [ + Account( + id="invest-engine-primary", + name="InvestEngine ISA", + account_type=AccountType.ISA, + currency="GBP", + provider="invest-engine", + ), + Account( + id="schwab-workplace", + name="Schwab (US workplace)", + account_type=AccountType.GIA, + currency="USD", + provider="schwab", + ), + ] + + async def fetch( + self, + *, + since: datetime | None = None, + before: datetime | None = None, + ) -> AsyncIterator[Activity]: + # IMAP doesn't give us a server-side date range directly without + # constructing IMAP SEARCH criteria; filter client-side. + for a in fetch_activities(self._creds): + if since is not None and a.date < since: + continue + if before is not None and a.date >= before: + continue + yield a + + +if __name__ == "__main__": + # Local smoke — invoked manually for debug, never from the CronJob. + import os + logging.basicConfig(level=logging.INFO) + c = ImapCreds( + host=os.environ["IMAP_HOST"], + user=os.environ["IMAP_USER"], + password=os.environ["IMAP_PASSWORD"], + directory=os.environ.get("IMAP_DIRECTORY", "INBOX"), + ) + acts = fetch_activities(c) + print(f"total={len(acts)}") + for a in acts[:5]: + print(f" {a.activity_type} {a.symbol} {a.date.isoformat()}") diff --git a/broker_sync/sinks/wealthfolio.py b/broker_sync/sinks/wealthfolio.py index f82817f..47881db 100644 --- a/broker_sync/sinks/wealthfolio.py +++ b/broker_sync/sinks/wealthfolio.py @@ -130,10 +130,7 @@ class WealthfolioSink: """ existing = await self.list_accounts() for a in existing: - if ( - a.get("provider") == account.provider - and a.get("providerAccountId") == account.id - ): + if (a.get("provider") == account.provider and a.get("providerAccountId") == account.id): wf_id = a.get("id") assert isinstance(wf_id, str) return wf_id @@ -159,9 +156,7 @@ class WealthfolioSink: created = resp.json() wf_id = created.get("id") if not isinstance(wf_id, str): - raise WealthfolioError( - f"POST /accounts returned no id: {created}" - ) + raise WealthfolioError(f"POST /accounts returned no id: {created}") return wf_id # -- activity import -- @@ -213,15 +208,12 @@ class WealthfolioSink: checked = check.json() if not isinstance(checked, list): raise ImportValidationError( - f"Wealthfolio /import/check returned non-list: {type(checked).__name__}" - ) + f"Wealthfolio /import/check returned non-list: {type(checked).__name__}") invalid = [r for r in checked if isinstance(r, dict) and r.get("errors")] if invalid: - raise ImportValidationError( - f"Wealthfolio /import/check flagged {len(invalid)} row(s); " - f"first: {invalid[0]}" - ) + raise ImportValidationError(f"Wealthfolio /import/check flagged {len(invalid)} row(s); " + f"first: {invalid[0]}") # Drop any row the server marked is_valid=false (shouldn't happen # without errors, but defensive). valid_rows = [r for r in checked if isinstance(r, dict) and r.get("isValid")] diff --git a/tests/sinks/test_wealthfolio.py b/tests/sinks/test_wealthfolio.py index f554a19..210b915 100644 --- a/tests/sinks/test_wealthfolio.py +++ b/tests/sinks/test_wealthfolio.py @@ -48,7 +48,10 @@ def _login_ok(req: httpx.Request) -> httpx.Response: assert body == {"password": "hunter2"} return httpx.Response( 200, - json={"authenticated": True, "expiresIn": 604800}, + json={ + "authenticated": True, + "expiresIn": 604800 + }, headers={"set-cookie": "wf_token=abc123; Path=/api; HttpOnly"}, ) @@ -219,21 +222,25 @@ async def test_import_dry_run_then_real(tmp_path: Path) -> None: calls.append(req.url.path) if req.url.path == "/api/v1/activities/import/check": # /import/check hydrates and returns a list of ActivityImport. - return httpx.Response(200, json=[ - { - "symbol": "VUAG", - "isValid": True, - "errors": None, - "assetId": "enriched-asset-uuid", - "exchangeMic": "XLON", - }, - ]) + return httpx.Response(200, + json=[ + { + "symbol": "VUAG", + "isValid": True, + "errors": None, + "assetId": "enriched-asset-uuid", + "exchangeMic": "XLON", + }, + ]) if req.url.path == "/api/v1/activities/import": return httpx.Response( 200, json={ "activities": [ - {"id": "wf-1", "external_id": "t212:1"}, + { + "id": "wf-1", + "external_id": "t212:1" + }, ], }, ) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 198e58b..481c4d7 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -86,18 +86,22 @@ async def test_pipeline_skips_dedup_then_imports_new(tmp_path: Path) -> None: body = json.loads(req.content) # Echo each activity back marked valid (mimic Wealthfolio's # hydrate step). - return httpx.Response(200, json=[ - {**a, "isValid": True, "errors": None} for a in body["activities"] - ]) + return httpx.Response(200, + json=[{ + **a, "isValid": True, + "errors": None + } for a in body["activities"]]) if req.url.path == "/api/v1/activities/import": body = req.content.decode() posted_batches.append(body) return httpx.Response( 200, - json={"activities": [ - {"id": f"wf-{i}", "external_id": ext} - for i, ext in enumerate(["a", "b", "c"]) - ]}, + json={ + "activities": [{ + "id": f"wf-{i}", + "external_id": ext + } for i, ext in enumerate(["a", "b", "c"])] + }, ) return httpx.Response(500) From 4e2da876378a094e05faf03e6fd481a3619ff7d0 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 22:24:36 +0000 Subject: [PATCH 06/44] sinks: detect silent Wealthfolio /import drops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the check step returns isValid=true + no errors, a row can still be silently dropped by /import (response returns activities=[] on 200 OK). Root-cause is usually a field that check hydrates but /import re-normalises differently (date string form, asset_id resolution). When we send N valid rows and get back 0, raise ImportValidationError with a snippet of the check output + first warning — gives the operator a concrete hint to fix the producer instead of silently growing dedup against activities that never landed. poetry run pytest -q → 109 passed, 1 skipped poetry run mypy → clean poetry run ruff check → clean --- broker_sync/sinks/wealthfolio.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/broker_sync/sinks/wealthfolio.py b/broker_sync/sinks/wealthfolio.py index 47881db..e968927 100644 --- a/broker_sync/sinks/wealthfolio.py +++ b/broker_sync/sinks/wealthfolio.py @@ -224,7 +224,24 @@ class WealthfolioSink: if isinstance(raw, dict) and "activities" in raw: got = raw["activities"] assert isinstance(got, list) - return got - if isinstance(raw, list): - return raw - return [] + elif isinstance(raw, list): + got = raw + else: + got = [] + # Silent-drop detection: if we sent N valid rows but got 0 back, something + # is silently rejecting them (usually a date-format or asset-resolution + # quirk that check() didn't catch). Raise so the pipeline records failure + # instead of marking the rows as synced when they never landed. + if valid_rows and not got: + # Also surface any per-row `errors` or `warnings` from the check step + # — those are often the best hint about why /import dropped them. + first_warn = next( + (r.get("warnings") for r in checked if isinstance(r, dict) and r.get("warnings")), + None, + ) + raise ImportValidationError( + f"Wealthfolio /import silently dropped all {len(valid_rows)} rows. " + f"First checked row: {checked[0] if checked else 'none'}. " + f"First warning (if any): {first_warn}" + ) + return got From 74b2179c83bfc233a14c8aa4e1a6166bf38e2a8d Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 17 Apr 2026 22:30:24 +0000 Subject: [PATCH 07/44] sinks: read summary.imported as truth for partial-persist detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /import response returns activities=[input echo with errors annotated] — its length equals input size regardless of actual persistence. The summary{total,imported,skipped,duplicates} block is the authoritative signal. When imported Date: Fri, 17 Apr 2026 22:38:21 +0000 Subject: [PATCH 08/44] Add finance_mysql provider + CLI for historical backfill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit finance.position (171 rows, 2020-06-07 to 2025-12-19) is the only source of InvestEngine + Schwab trade history pre-dating the broker-sync project. This provider reads it once and pushes every row into the correct WF account (.L tickers → IE ISA, others → Schwab). Dedup: external_id = 'finance-mysql:position:' — idempotent on re-run. Auth: aiomysql as MySQL root (user-authorized) against the standalone mysql:8.4 in-cluster service. New CLI: broker-sync finance-mysql-import New tests: 5 unit tests covering route, symbol normalise, BUY/SELL detection. poetry run pytest -q → 114 passed, 1 skipped poetry run mypy → clean (aiomysql shielded with type: ignore) poetry run ruff check → clean --- broker_sync/cli.py | 65 +++++++++++ broker_sync/providers/finance_mysql.py | 144 +++++++++++++++++++++++++ broker_sync/sinks/wealthfolio.py | 14 ++- poetry.lock | 37 ++++++- pyproject.toml | 1 + tests/providers/test_finance_mysql.py | 66 ++++++++++++ 6 files changed, 318 insertions(+), 9 deletions(-) create mode 100644 broker_sync/providers/finance_mysql.py create mode 100644 tests/providers/test_finance_mysql.py diff --git a/broker_sync/cli.py b/broker_sync/cli.py index ea7d8c9..3b4ff22 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -230,6 +230,71 @@ def invest_engine( asyncio.run(_run()) +@app.command("finance-mysql-import") +def finance_mysql_import( + wf_base_url: str = typer.Option(..., envvar="WF_BASE_URL"), + wf_username: str = typer.Option(..., envvar="WF_USERNAME"), + wf_password: str = typer.Option(..., envvar="WF_PASSWORD"), + wf_session_path: str = typer.Option("/data/wealthfolio_session.json", + envvar="WF_SESSION_PATH"), + db_host: str = typer.Option(..., envvar="FINANCE_DB_HOST"), + db_port: int = typer.Option(3306, envvar="FINANCE_DB_PORT"), + db_user: str = typer.Option(..., envvar="FINANCE_DB_USER"), + db_password: str = typer.Option(..., envvar="FINANCE_DB_PASSWORD"), + db_name: str = typer.Option("finance", envvar="FINANCE_DB_NAME"), + data_dir: str = typer.Option("/data", envvar="BROKER_SYNC_DATA_DIR"), +) -> None: + """One-shot backfill: read the retired finance app's MySQL position table + and push every row into the correct Wealthfolio account (IE for .L + tickers, Schwab for US tickers). Idempotent via dedup.""" + from broker_sync.dedup import SyncRecordStore + from broker_sync.pipeline import sync_provider_to_wealthfolio + from broker_sync.providers.finance_mysql import ( + FinanceMySQLCreds, + FinanceMySQLProvider, + ) + from broker_sync.sinks.wealthfolio import WealthfolioSink + + _setup_logging() + data = Path(data_dir) + data.mkdir(parents=True, exist_ok=True) + + async def _run() -> None: + sink = WealthfolioSink( + base_url=wf_base_url, + username=wf_username, + password=wf_password, + session_path=wf_session_path, + ) + provider = FinanceMySQLProvider( + FinanceMySQLCreds( + host=db_host, + port=db_port, + user=db_user, + password=db_password, + database=db_name, + )) + dedup = SyncRecordStore(data / "sync.db") + try: + if not Path(wf_session_path).exists(): + await sink.login() + result = await sync_provider_to_wealthfolio( + provider=provider, + sink=sink, + dedup=dedup, + ) + finally: + await sink.close() + typer.echo(f"finance-mysql: fetched={result.fetched} " + f"new={result.new_after_dedup} " + f"imported={result.imported} " + f"failed={result.failed}") + if result.failed > 0: + sys.exit(1) + + asyncio.run(_run()) + + @app.command("imap-ingest") def imap_ingest( wf_base_url: str = typer.Option(..., envvar="WF_BASE_URL"), diff --git a/broker_sync/providers/finance_mysql.py b/broker_sync/providers/finance_mysql.py new file mode 100644 index 0000000..61eee7d --- /dev/null +++ b/broker_sync/providers/finance_mysql.py @@ -0,0 +1,144 @@ +"""Backfill-from-finance provider. + +The retired `finance` app's MySQL has a `position` table with 5+ years of +InvestEngine + Schwab trade history (2020 onwards) that the broker-sync +pipeline otherwise can't reconstruct (IE's emails only go back to when +Viktor started receiving them; Schwab emails are sparse). This provider +reads that table once and emits canonical Activities so a full-history +backfill into Wealthfolio is possible. + +Ticker routing to Wealthfolio accounts: + *.L (VUAG.L, VUSA.L, etc.) -> InvestEngine ISA (GBP) + everything else (META, *_US_EQ) -> Schwab (US workplace, USD) + +Deduplication: the finance.position PK (a giant numeric string) goes into +external_id verbatim, so re-runs are idempotent against the sync_record +store. +""" +from __future__ import annotations + +import logging +from collections.abc import AsyncIterator +from datetime import UTC, datetime +from decimal import Decimal +from typing import NamedTuple + +import aiomysql # type: ignore[import-untyped] + +from broker_sync.models import Account, AccountType, Activity, ActivityType + +log = logging.getLogger(__name__) + +IE_ACCOUNT_ID = "invest-engine-primary" +SCHWAB_ACCOUNT_ID = "schwab-workplace" + + +class FinanceMySQLCreds(NamedTuple): + host: str + port: int + user: str + password: str + database: str + + +def _route(ticker: str) -> tuple[str, AccountType, str]: + """Return (account_id, account_type, currency) for a raw ticker.""" + if ticker.endswith(".L"): + return IE_ACCOUNT_ID, AccountType.ISA, "GBP" + return SCHWAB_ACCOUNT_ID, AccountType.GIA, "USD" + + +def _normalise_symbol(ticker: str) -> str: + """Strip finance-app quirks so the output symbol matches T212/Wealthfolio.""" + # VUAG.L -> VUAG (LSE handled by Wealthfolio's exchange_mic resolution) + if ticker.endswith(".L"): + return ticker[:-2] + # FLME_US_EQ -> FLME (Trading212-style suffix leaked into the old finance DB) + if ticker.endswith("_US_EQ"): + return ticker[:-6] + if ticker.endswith("_EQ"): + return ticker[:-3] + return ticker + + +def _row_to_activity(row: dict[str, object]) -> Activity: + ticker = str(row["ticker"]) + account_id, account_type, default_ccy = _route(ticker) + raw_qty = Decimal(str(row["num_shares"])) + activity_type = ActivityType.BUY if raw_qty > 0 else ActivityType.SELL + # buy_date from MySQL comes back as datetime (aiomysql converts) + dt = row["buy_date"] + if isinstance(dt, datetime): + date = dt if dt.tzinfo else dt.replace(tzinfo=UTC) + else: + date = datetime.fromisoformat(str(dt)).replace(tzinfo=UTC) + currency_raw = row.get("currency") + currency = str(currency_raw) if currency_raw else default_ccy + return Activity( + external_id=f"finance-mysql:position:{row['id']}", + account_id=account_id, + account_type=account_type, + date=date, + activity_type=activity_type, + symbol=_normalise_symbol(ticker), + quantity=abs(raw_qty), + unit_price=Decimal(str(row["buy_price"])), + currency=currency, + notes=f"finance-mysql:{ticker}", + ) + + +class FinanceMySQLProvider: + """Read-only backfill from the retired finance MySQL `position` table.""" + name = "finance-mysql" + + def __init__(self, creds: FinanceMySQLCreds) -> None: + self._creds = creds + + def accounts(self) -> list[Account]: + return [ + Account( + id=IE_ACCOUNT_ID, + name="InvestEngine ISA", + account_type=AccountType.ISA, + currency="GBP", + provider="invest-engine", + ), + Account( + id=SCHWAB_ACCOUNT_ID, + name="Schwab (US workplace)", + account_type=AccountType.GIA, + currency="USD", + provider="schwab", + ), + ] + + async def fetch( + self, + *, + since: datetime | None = None, + before: datetime | None = None, + ) -> AsyncIterator[Activity]: + conn = await aiomysql.connect( + host=self._creds.host, + port=self._creds.port, + user=self._creds.user, + password=self._creds.password, + db=self._creds.database, + autocommit=True, + ) + try: + async with conn.cursor(aiomysql.DictCursor) as cur: + await cur.execute("SELECT id, ticker, buy_price, num_shares, currency, buy_date, " + "account_id FROM position ORDER BY buy_date ASC") + rows = await cur.fetchall() + log.info("finance-mysql: %d position rows", len(rows)) + for row in rows: + activity = _row_to_activity(row) + if since is not None and activity.date < since: + continue + if before is not None and activity.date >= before: + continue + yield activity + finally: + conn.close() diff --git a/broker_sync/sinks/wealthfolio.py b/broker_sync/sinks/wealthfolio.py index e69cd73..4d73412 100644 --- a/broker_sync/sinks/wealthfolio.py +++ b/broker_sync/sinks/wealthfolio.py @@ -243,11 +243,9 @@ class WealthfolioSink: err_msg = summary.get("errorMessage") or "no errorMessage" skipped = int(summary.get("skipped", 0)) dupes = int(summary.get("duplicates", 0)) - raise ImportValidationError( - f"Wealthfolio /import persisted {imported_n}/{total_n} " - f"(skipped={skipped} duplicates={dupes}). " - f"errorMessage: {err_msg}" - ) + raise ImportValidationError(f"Wealthfolio /import persisted {imported_n}/{total_n} " + f"(skipped={skipped} duplicates={dupes}). " + f"errorMessage: {err_msg}") # Legacy silent-drop guard for no-summary responses. elif valid_rows and not got: first_warn = next( @@ -257,6 +255,6 @@ class WealthfolioSink: raise ImportValidationError( f"Wealthfolio /import silently dropped all {len(valid_rows)} rows. " f"First checked row: {checked[0] if checked else 'none'}. " - f"First warning: {first_warn}" - ) - return got + f"First warning: {first_warn}") + assert isinstance(got, list) + return [r for r in got if isinstance(r, dict)] diff --git a/poetry.lock b/poetry.lock index 73fc482..07fce53 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,24 @@ # This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +[[package]] +name = "aiomysql" +version = "0.3.2" +description = "MySQL driver for asyncio." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "aiomysql-0.3.2-py3-none-any.whl", hash = "sha256:c82c5ba04137d7afd5c693a258bea8ead2aad77101668044143a991e04632eb2"}, + {file = "aiomysql-0.3.2.tar.gz", hash = "sha256:72d15ef5cfc34c03468eb41e1b90adb9fd9347b0b589114bd23ead569a02ac1a"}, +] + +[package.dependencies] +PyMySQL = ">=1.0" + +[package.extras] +rsa = ["PyMySQL[rsa] (>=1.0)"] +sa = ["sqlalchemy (>=1.3,<1.4)"] + [[package]] name = "anyio" version = "4.13.0" @@ -459,6 +478,22 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pymysql" +version = "1.1.2" +description = "Pure Python MySQL Driver" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9"}, + {file = "pymysql-1.1.2.tar.gz", hash = "sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03"}, +] + +[package.extras] +ed25519 = ["PyNaCl (>=1.4.0)"] +rsa = ["cryptography"] + [[package]] name = "pytest" version = "8.4.2" @@ -670,4 +705,4 @@ platformdirs = ">=3.5.1" [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "04a3e24fe45c75f975140aff6076af0a156772a1a8e82eba30ee2345ac1d8bd6" +content-hash = "dcc5b4eadd0a8df900e74674acf33215091dcb9bd0fffcefb03607dde2408a16" diff --git a/pyproject.toml b/pyproject.toml index 0a25a66..680f5ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ beautifulsoup4 = "^4.12" python-dateutil = "^2.9" typer = "^0.12" click = "<8.2" # typer 0.12 uses make_metavar() without ctx; click 8.2 made ctx required +aiomysql = "^0.3.2" [tool.poetry.group.dev.dependencies] pytest = "^8.3" diff --git a/tests/providers/test_finance_mysql.py b/tests/providers/test_finance_mysql.py new file mode 100644 index 0000000..2887694 --- /dev/null +++ b/tests/providers/test_finance_mysql.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from datetime import UTC, datetime +from decimal import Decimal + +from broker_sync.models import AccountType, ActivityType +from broker_sync.providers.finance_mysql import _normalise_symbol, _route, _row_to_activity + + +def test_lse_ticker_routes_to_investengine() -> None: + acct, t, ccy = _route("VUAG.L") + assert acct == "invest-engine-primary" + assert t is AccountType.ISA + assert ccy == "GBP" + + +def test_us_ticker_routes_to_schwab() -> None: + assert _route("META") == ("schwab-workplace", AccountType.GIA, "USD") + assert _route("FLME_US_EQ") == ("schwab-workplace", AccountType.GIA, "USD") + + +def test_normalise_symbol() -> None: + assert _normalise_symbol("VUAG.L") == "VUAG" + assert _normalise_symbol("VUSA.L") == "VUSA" + assert _normalise_symbol("META") == "META" + assert _normalise_symbol("FLME_US_EQ") == "FLME" + assert _normalise_symbol("FOO_EQ") == "FOO" + + +def test_row_to_buy_activity() -> None: + row = { + "id": "123456", + "ticker": "VUAG.L", + "buy_price": 85.5, + "num_shares": 10.0, + "currency": "GBP", + "buy_date": datetime(2022, 3, 15, 10, 30), + "account_id": 1, + } + a = _row_to_activity(row) + assert a.external_id == "finance-mysql:position:123456" + assert a.account_id == "invest-engine-primary" + assert a.account_type is AccountType.ISA + assert a.activity_type is ActivityType.BUY + assert a.symbol == "VUAG" # .L stripped + assert a.quantity == Decimal("10.0") + assert a.unit_price == Decimal("85.5") + assert a.currency == "GBP" + assert a.date == datetime(2022, 3, 15, 10, 30, tzinfo=UTC) + + +def test_row_to_sell_when_qty_negative() -> None: + row = { + "id": "x", + "ticker": "META", + "buy_price": 450.0, + "num_shares": -2.5, # sell + "currency": "USD", + "buy_date": datetime(2024, 8, 5), + "account_id": 1, + } + a = _row_to_activity(row) + assert a.activity_type is ActivityType.SELL + assert a.quantity == Decimal("2.5") # absolute + assert a.account_id == "schwab-workplace" + assert a.symbol == "META" From c830856ba19b56a8ec62d3cdccb7921fa64f6568 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 18 Apr 2026 12:02:49 +0000 Subject: [PATCH 09/44] =?UTF-8?q?imap:=20route=20IE=20BUYs=20to=20ISA=20fi?= =?UTF-8?q?rst-=C2=A320k=20/=20GIA=20overflow=20per=20UK=20tax=20year?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context Viktor's InvestEngine account has both an ISA and a GIA wrapper. Trade confirmation emails (info@investengine.com) are identical between them — subject "Here's how your portfolio looks now", body shows "Client name: Viktor Barzin" with no portfolio/account type. That left the IMAP parser hardcoded to route every IE BUY to the ISA (invest-engine-primary), which produced a 2339-share over-count when 2023-24 GIA buys landed in the ISA during the 2026-04-18 reconciliation. Viktor's rule: from 6 April each tax year, BUYs fill ISA up to the £20,000 cap, then overflow to GIA. This commit codifies that rule in a standalone batch splitter and applies it at the ImapProvider boundary. Also picks up a silent-drop bug surfaced during the same reconciliation: WF's /import (unlike /import/check) rejects naive datetimes with "Invalid date". The sink now coerces tzinfo=UTC defensively so every provider gets the same guarantee. ## This change - `_split_ie_by_isa_cap(activities)` — sorts all IE-ISA BUYs by date and walks them once per UK tax year (6 April boundary). A BUY whose running tax-year total BEFORE it is strictly below £20k stays on the ISA; otherwise it flips to a new `invest-engine-gia` account_id. No fractional splits — boundary activities go whole to whichever bucket their pre-running-total dictates. Non-IE and non-BUY activities pass through unchanged. - `ImapProvider.accounts()` gains an `invest-engine-gia` Account so the pipeline's `_ensure_accounts` can resolve both. - `ImapProvider.fetch()` calls the splitter on the full batch before applying the `since`/`before` date filter — batch-level sort guarantees consistent routing regardless of the order IMAP returns messages. - `WealthfolioSink._activity_to_import_row` coerces naive datetimes to UTC so the row passes WF /import validation. ## What is NOT in this change - No retroactive re-routing of data already in WF. Historical finance-mysql rows (all lumped to `invest-engine-primary` or `invest-engine-gia` by the existing heuristic) keep their current account assignment. If a past tax-year was routed "wrong" under the new rule, that's corrected manually via the WF API, not here. - No change to the Schwab or trading212 paths. ## Verification ### Automated \`\`\` $ poetry run pytest tests/providers/test_imap.py -v tests/providers/test_imap.py::test_uk_tax_year_start_before_april_6_rolls_back PASSED tests/providers/test_imap.py::test_single_tax_year_under_cap_stays_isa PASSED tests/providers/test_imap.py::test_overflow_past_cap_flips_to_gia PASSED tests/providers/test_imap.py::test_tax_year_boundary_resets_cap PASSED tests/providers/test_imap.py::test_out_of_order_activities_sorted_before_cap_applied PASSED tests/providers/test_imap.py::test_non_ie_activities_passed_through_unchanged PASSED 6 passed in 0.36s $ poetry run pytest -q --ignore=tests/test_cli.py 116 passed, 1 skipped in 2.76s $ poetry run ruff check broker_sync/providers/imap.py broker_sync/sinks/wealthfolio.py All checks passed! $ poetry run mypy broker_sync/providers/imap.py broker_sync/sinks/wealthfolio.py Success: no issues found in 2 source files \`\`\` ### Manual verification The tzinfo fix was validated against the live WF instance during the 2026-04-18 reconciliation — before the fix, /import returned \`"errors": {"symbol": ["Invalid date '2022-05-24T00:00:00'."]}\` for every IMAP activity; after, the same payload imported cleanly. The splitter was not exercised against live IMAP data because Viktor's mailbox only has Apr 2022 → Feb 2024 emails, all inside finance.position's existing coverage. Running IMAP ingest with \`since=2024-04-06\` yields fetched=0. The unit tests cover the boundary arithmetic; a live run will happen when newer emails are parsed (or when finance coverage is re-scoped). ## Reproduce locally 1. \`poetry install\` 2. \`poetry run pytest tests/providers/test_imap.py\` 3. Expected: 6 passed, 0 failed. Co-Authored-By: Claude Opus 4.7 (1M context) --- broker_sync/providers/imap.py | 72 ++++++++++++++++++++-- broker_sync/sinks/wealthfolio.py | 6 +- tests/providers/test_imap.py | 100 +++++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+), 5 deletions(-) create mode 100644 tests/providers/test_imap.py diff --git a/broker_sync/providers/imap.py b/broker_sync/providers/imap.py index de46aa9..e935bab 100644 --- a/broker_sync/providers/imap.py +++ b/broker_sync/providers/imap.py @@ -19,14 +19,66 @@ import logging import re import ssl from collections.abc import AsyncIterator, Iterator -from datetime import datetime +from datetime import date, datetime +from decimal import Decimal from email.message import Message from typing import NamedTuple -from broker_sync.models import Account, AccountType, Activity +from broker_sync.models import Account, AccountType, Activity, ActivityType from broker_sync.providers.parsers import invest_engine as ie_parser from broker_sync.providers.parsers.schwab import parse_schwab_email +_IE_ISA_ACCOUNT_ID = "invest-engine-primary" +_IE_GIA_ACCOUNT_ID = "invest-engine-gia" +_ISA_ANNUAL_CAP = Decimal("20000") +_UK_TAX_YEAR_START = (4, 6) # (month, day) — UK tax year starts 6 April + + +def _uk_tax_year_start(d: datetime) -> date: + """Return the start date (6 April of year N) of the UK tax year containing `d`.""" + month, day = _UK_TAX_YEAR_START + cutoff = date(d.year, month, day) + return cutoff if d.date() >= cutoff else date(d.year - 1, month, day) + + +def _split_ie_by_isa_cap( + activities: list[Activity], + *, + isa_cap: Decimal = _ISA_ANNUAL_CAP, +) -> list[Activity]: + """Re-route IE BUYs: first `isa_cap` GBP of each UK tax year → ISA, rest → GIA. + + Viktor's IE account has both an ISA and a GIA wrapper, and his trade + confirmation emails don't indicate which one a given buy hit. Empirically, + he fills the ISA allowance first each tax year (6 April) and any excess + lands in GIA. This function partitions an already-parsed batch of Activity + objects by that rule. + + Rule for boundary buys: a BUY is assigned to ISA iff the running tax-year + total BEFORE it is still strictly below the cap; otherwise GIA. Whole- + activity assignment — no fractional splits. + + Non-IE activities and non-BUYs are passed through unchanged. + """ + ie_buys = [ + a for a in activities + if a.account_id == _IE_ISA_ACCOUNT_ID and a.activity_type is ActivityType.BUY + ] + ie_buys.sort(key=lambda a: a.date) + cumulative: dict[date, Decimal] = {} + for a in ie_buys: + ty = _uk_tax_year_start(a.date) + running = cumulative.get(ty, Decimal(0)) + trade_value = (a.quantity or Decimal(0)) * (a.unit_price or Decimal(0)) + if running < isa_cap: + a.account_id = _IE_ISA_ACCOUNT_ID + a.account_type = AccountType.ISA + else: + a.account_id = _IE_GIA_ACCOUNT_ID + a.account_type = AccountType.GIA + cumulative[ty] = running + trade_value + return activities + log = logging.getLogger(__name__) _IE_SENDERS = {"noreply@investengine.com", "hello@investengine.com"} @@ -142,12 +194,19 @@ class ImapProvider: def accounts(self) -> list[Account]: return [ Account( - id="invest-engine-primary", + id=_IE_ISA_ACCOUNT_ID, name="InvestEngine ISA", account_type=AccountType.ISA, currency="GBP", provider="invest-engine", ), + Account( + id=_IE_GIA_ACCOUNT_ID, + name="InvestEngine GIA", + account_type=AccountType.GIA, + currency="GBP", + provider="invest-engine", + ), Account( id="schwab-workplace", name="Schwab (US workplace)", @@ -165,7 +224,12 @@ class ImapProvider: ) -> AsyncIterator[Activity]: # IMAP doesn't give us a server-side date range directly without # constructing IMAP SEARCH criteria; filter client-side. - for a in fetch_activities(self._creds): + all_activities = fetch_activities(self._creds) + # Apply ISA/GIA £20k-cap routing in one batch-level pass so each UK tax + # year's cumulative total is computed consistently regardless of email + # order on the server. + routed = _split_ie_by_isa_cap(all_activities) + for a in routed: if since is not None and a.date < since: continue if before is not None and a.date >= before: diff --git a/broker_sync/sinks/wealthfolio.py b/broker_sync/sinks/wealthfolio.py index 4d73412..efbd50c 100644 --- a/broker_sync/sinks/wealthfolio.py +++ b/broker_sync/sinks/wealthfolio.py @@ -2,6 +2,7 @@ from __future__ import annotations import json from collections.abc import Iterable +from datetime import UTC from pathlib import Path from typing import Any @@ -164,8 +165,11 @@ class WealthfolioSink: @staticmethod def _activity_to_import_row(a: Activity) -> dict[str, Any]: """Match Wealthfolio's ActivityImport struct (camelCase JSON).""" + # WF /import rejects naive datetimes with "Invalid date" (even though + # /import/check accepts them) — coerce to UTC if tzinfo is missing. + date = a.date if a.date.tzinfo is not None else a.date.replace(tzinfo=UTC) row: dict[str, Any] = { - "date": a.date.isoformat(), + "date": date.isoformat(), "symbol": a.symbol or "$CASH", "activityType": str(a.activity_type), "currency": a.currency, diff --git a/tests/providers/test_imap.py b/tests/providers/test_imap.py new file mode 100644 index 0000000..5e1c14f --- /dev/null +++ b/tests/providers/test_imap.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +from datetime import UTC, date, datetime +from decimal import Decimal + +from broker_sync.models import AccountType, Activity, ActivityType +from broker_sync.providers.imap import ( + _IE_GIA_ACCOUNT_ID, + _IE_ISA_ACCOUNT_ID, + _split_ie_by_isa_cap, + _uk_tax_year_start, +) + + +def _buy(on: datetime, qty: str, price: str) -> Activity: + return Activity( + external_id=f"invest-engine:{on.isoformat()}|{qty}|{price}", + account_id=_IE_ISA_ACCOUNT_ID, + account_type=AccountType.ISA, + date=on, + activity_type=ActivityType.BUY, + currency="GBP", + symbol="VUAG", + quantity=Decimal(qty), + unit_price=Decimal(price), + ) + + +def test_uk_tax_year_start_before_april_6_rolls_back() -> None: + assert _uk_tax_year_start(datetime(2025, 4, 5, tzinfo=UTC)) == date(2024, 4, 6) + assert _uk_tax_year_start(datetime(2025, 4, 6, tzinfo=UTC)) == date(2025, 4, 6) + assert _uk_tax_year_start(datetime(2025, 1, 15, tzinfo=UTC)) == date(2024, 4, 6) + assert _uk_tax_year_start(datetime(2024, 4, 7, tzinfo=UTC)) == date(2024, 4, 6) + + +def test_single_tax_year_under_cap_stays_isa() -> None: + acts = [ + _buy(datetime(2024, 5, 1, tzinfo=UTC), "100", "50"), # £5000 + _buy(datetime(2024, 8, 1, tzinfo=UTC), "100", "80"), # £8000 + ] + routed = _split_ie_by_isa_cap(acts) + assert all(a.account_id == _IE_ISA_ACCOUNT_ID for a in routed) + assert all(a.account_type is AccountType.ISA for a in routed) + + +def test_overflow_past_cap_flips_to_gia() -> None: + acts = [ + _buy(datetime(2024, 5, 1, tzinfo=UTC), "100", "80"), # £8,000 + _buy(datetime(2024, 6, 1, tzinfo=UTC), "150", "80"), # +£12,000 → £20,000 total; prev £8k < cap → ISA + _buy(datetime(2024, 7, 1, tzinfo=UTC), "10", "80"), # prev £20,000 ≥ cap → GIA + _buy(datetime(2024, 8, 1, tzinfo=UTC), "10", "80"), # GIA + ] + routed = _split_ie_by_isa_cap(acts) + assert routed[0].account_id == _IE_ISA_ACCOUNT_ID + assert routed[1].account_id == _IE_ISA_ACCOUNT_ID + assert routed[2].account_id == _IE_GIA_ACCOUNT_ID + assert routed[2].account_type is AccountType.GIA + assert routed[3].account_id == _IE_GIA_ACCOUNT_ID + + +def test_tax_year_boundary_resets_cap() -> None: + acts = [ + # 2023-24 tax year: £20k in ISA, plus one in GIA + _buy(datetime(2023, 5, 1, tzinfo=UTC), "400", "50"), # £20,000 → ISA (prev 0 < cap) + _buy(datetime(2024, 1, 1, tzinfo=UTC), "100", "50"), # GIA (prev 20k) + # 2024-25 tax year starts 2024-04-06 — cap resets + _buy(datetime(2024, 5, 1, tzinfo=UTC), "100", "50"), # ISA (prev 0 for new year) + ] + routed = _split_ie_by_isa_cap(acts) + assert routed[0].account_id == _IE_ISA_ACCOUNT_ID + assert routed[1].account_id == _IE_GIA_ACCOUNT_ID + assert routed[2].account_id == _IE_ISA_ACCOUNT_ID + + +def test_out_of_order_activities_sorted_before_cap_applied() -> None: + acts = [ + _buy(datetime(2024, 8, 1, tzinfo=UTC), "10", "80"), # later date but given first + _buy(datetime(2024, 5, 1, tzinfo=UTC), "250", "80"), # earlier, £20,000 → ISA + ] + routed = _split_ie_by_isa_cap(acts) + by_date = {a.date: a for a in routed} + assert by_date[datetime(2024, 5, 1, tzinfo=UTC)].account_id == _IE_ISA_ACCOUNT_ID + assert by_date[datetime(2024, 8, 1, tzinfo=UTC)].account_id == _IE_GIA_ACCOUNT_ID + + +def test_non_ie_activities_passed_through_unchanged() -> None: + schwab_act = Activity( + external_id="schwab:abc", + account_id="schwab-workplace", + account_type=AccountType.GIA, + date=datetime(2024, 5, 1, tzinfo=UTC), + activity_type=ActivityType.SELL, + currency="USD", + symbol="META", + quantity=Decimal("10"), + unit_price=Decimal("500"), + ) + routed = _split_ie_by_isa_cap([schwab_act]) + assert routed[0].account_id == "schwab-workplace" + assert routed[0].account_type is AccountType.GIA From 832732a419e9c15d34cb9e8d79e0cc913ce38a47 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 18 Apr 2026 14:09:04 +0000 Subject: [PATCH 10/44] fidelity-planviewer: scaffold provider + CLI (seed + stub ingest) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context UK workplace pension at planviewer.fidelity.co.uk has no public API; the SPA calls a private JSON backend at prd.wiciam.fidelity.co.uk/cvmfe/api/*. Viktor confirmed in DevTools that an OPTIONS preflight lists auth headers (ch, fid, rid, sid, tbid, theosreferer, ua). Full reverse-engineering of the endpoint paths is pending Viktor's POST cURL paste for transactions + holdings views. Until those endpoints are captured, ship the scaffold: provider module, CLI commands, tests, docs. This unblocks installing Playwright in the image and lets Viktor run the one-off seed command on his laptop ahead of the data integration. ## This change - broker_sync/providers/fidelity_planviewer.py - FidelityCreds namedtuple (storage_state_path, plan_id). - FidelitySessionError (401 → re-seed), FidelityProviderConfigError. - FidelityPlanViewerProvider: .accounts() returns a single WORKPLACE_PENSION account, .fetch() raises until endpoints are wired. - broker_sync/cli.py - fidelity-seed: launches headed Chromium so Viktor can log in and tick "Remember device", then dumps storage_state.json. - fidelity-ingest: stub matching the invest-engine / trading212 CLI shape; reads storage_state + plan_id, pipes through the shared pipeline. - tests/providers/test_fidelity_planviewer.py - Asserts the single-account shape + the loud-failure guard. - docs/providers/fidelity-planviewer.md - Architecture diagram, one-time seed procedure, backfill + monthly commands, alert runbook. - pyproject.toml - playwright ^1.47 as a first-class dep (used only by fidelity-seed and later by the session-refresh step in fidelity-ingest). ## What is NOT in this change - Endpoint wiring in provider.fetch() — blocked on DevTools POST cURL. - Infra CronJob + Vault secret + Prometheus alert — lands once the first manual backfill succeeds and we know the Chromium image size is fine. - Dockerfile Chromium install — same trigger. ## Verification ### Automated $ poetry run pytest tests/providers/test_fidelity_planviewer.py -v 2 passed in 0.08s $ poetry run pytest -q 122 passed, 1 skipped in 1.07s $ poetry run mypy broker_sync/providers/fidelity_planviewer.py broker_sync/cli.py Success: no issues found in 2 source files $ poetry run ruff check broker_sync/providers/fidelity_planviewer.py broker_sync/cli.py tests/providers/test_fidelity_planviewer.py All checks passed! ### Manual (Viktor, later) 1. poetry install && poetry run playwright install chromium 2. poetry run broker-sync fidelity-seed --out /tmp/state.json 3. Chromium opens → log in → tick "Remember device" → press Enter 4. vault kv patch secret/broker-sync fidelity_storage_state=@/tmp/state.json Co-Authored-By: Claude Opus 4.7 (1M context) --- broker_sync/cli.py | 109 ++++++++++++++++ broker_sync/providers/fidelity_planviewer.py | 128 +++++++++++++++++++ docs/providers/fidelity-planviewer.md | 111 ++++++++++++++++ poetry.lock | 115 ++++++++++++++++- pyproject.toml | 4 + tests/providers/test_fidelity_planviewer.py | 42 ++++++ 6 files changed, 508 insertions(+), 1 deletion(-) create mode 100644 broker_sync/providers/fidelity_planviewer.py create mode 100644 docs/providers/fidelity-planviewer.md create mode 100644 tests/providers/test_fidelity_planviewer.py diff --git a/broker_sync/cli.py b/broker_sync/cli.py index 3b4ff22..b5ce4c2 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -358,6 +358,115 @@ def imap_ingest( asyncio.run(_run()) +@app.command("fidelity-seed") +def fidelity_seed( + out: str = typer.Option( + "fidelity_storage_state.json", + help="Where to write the storage_state JSON (stage it to Vault afterwards)", + ), + url: str = typer.Option( + "https://pv.planviewer.fidelity.co.uk/", + help="PlanViewer SPA URL — defaults to the production UK landing", + ), +) -> None: + """One-off: launch a headed Chromium so Viktor can log into PlanViewer and + capture a long-lived storage_state (cookies + localStorage) for the monthly + cron. + + Expected flow: + 1. Chromium opens on the PlanViewer login page. + 2. Viktor enters username, password, memorable word, MFA code. + 3. Viktor ticks "Remember device" / "Trust this browser" if offered. + 4. Viktor waits until the dashboard loads, then presses Enter in the terminal. + 5. Script dumps storage_state.json and exits. + 6. Viktor runs ``vault kv patch secret/broker-sync fidelity_storage_state=@...``. + """ + _setup_logging() + try: + from playwright.sync_api import sync_playwright + except ImportError as e: + typer.echo( + "Playwright is not installed — run `poetry install` first.", err=True) + raise typer.Exit(code=2) from e + + typer.echo(f"Opening {url} in a headed browser — log in, tick " + "'Remember device' if offered, then press Enter here.") + with sync_playwright() as pw: + browser = pw.chromium.launch(headless=False) + context = browser.new_context() + page = context.new_page() + page.goto(url) + input("Press Enter once you're fully logged in and the dashboard is visible… ") + context.storage_state(path=out) + browser.close() + typer.echo(f"Wrote {out} — stage it to Vault:") + typer.echo(f" vault kv patch secret/broker-sync fidelity_storage_state=@{out}") + + +@app.command("fidelity-ingest") +def fidelity_ingest( + wf_base_url: str = typer.Option(..., envvar="WF_BASE_URL"), + wf_username: str = typer.Option(..., envvar="WF_USERNAME"), + wf_password: str = typer.Option(..., envvar="WF_PASSWORD"), + wf_session_path: str = typer.Option("/data/wealthfolio_session.json", envvar="WF_SESSION_PATH"), + storage_state_path: str = typer.Option( + ..., + envvar="FIDELITY_STORAGE_STATE_PATH", + help="Path on disk to storage_state.json (materialised from Vault by the init container)", + ), + plan_id: str = typer.Option(..., envvar="FIDELITY_PLAN_ID"), + data_dir: str = typer.Option("/data", envvar="BROKER_SYNC_DATA_DIR"), + mode: str = typer.Option("steady", help="steady = last-60-days; backfill = full history"), +) -> None: + """Sync Fidelity UK PlanViewer contributions + fund purchases into Wealthfolio.""" + from broker_sync.dedup import SyncRecordStore + from broker_sync.pipeline import sync_provider_to_wealthfolio + from broker_sync.providers.fidelity_planviewer import ( + FidelityCreds, + FidelityPlanViewerProvider, + ) + from broker_sync.sinks.wealthfolio import WealthfolioSink + + _setup_logging() + + if mode == "steady": + since: datetime | None = datetime.now(UTC) - timedelta(days=60) + elif mode == "backfill": + since = None + else: + typer.echo(f"Unknown mode: {mode!r}. Use 'steady' or 'backfill'.", err=True) + sys.exit(2) + + async def _run() -> None: + sink = WealthfolioSink( + base_url=wf_base_url, + username=wf_username, + password=wf_password, + session_path=wf_session_path, + ) + provider = FidelityPlanViewerProvider(FidelityCreds( + storage_state_path=storage_state_path, + plan_id=plan_id, + )) + dedup = SyncRecordStore(Path(data_dir) / "sync.db") + try: + if not Path(wf_session_path).exists(): + await sink.login() + result = await sync_provider_to_wealthfolio( + provider=provider, sink=sink, dedup=dedup, since=since, + ) + finally: + await sink.close() + typer.echo(f"fidelity-ingest: fetched={result.fetched} " + f"new={result.new_after_dedup} " + f"imported={result.imported} " + f"failed={result.failed}") + if result.failed > 0: + sys.exit(1) + + asyncio.run(_run()) + + def _setup_logging() -> None: logging.basicConfig( level=logging.INFO, diff --git a/broker_sync/providers/fidelity_planviewer.py b/broker_sync/providers/fidelity_planviewer.py new file mode 100644 index 0000000..6031bc2 --- /dev/null +++ b/broker_sync/providers/fidelity_planviewer.py @@ -0,0 +1,128 @@ +"""Fidelity UK PlanViewer provider — workplace pension backfill + monthly sync. + +PlanViewer has no public individual-member API; Fidelity International's +developer portal only catalogues B2B scheme/HR endpoints. The SPA (at +``pv.planviewer.fidelity.co.uk``) does call a private JSON backend at +``prd.wiciam.fidelity.co.uk/cvmfe/api/*`` — we reverse-engineer that and feed +it through a Playwright-maintained session. + +## Session lifecycle + +1. **One-off seed** (``broker-sync fidelity-seed``): Viktor runs a headed + Chromium, logs in (password + memorable word + MFA), clicks "Remember + device". Playwright dumps the resulting ``storage_state.json`` (cookies + + localStorage) which we stash in Vault. + +2. **Monthly cron**: loads storage_state, boots headless Chromium, navigates + to the SPA once to let it refresh rolling session tokens, intercepts the + first outbound XHR to capture the ``sid``/``fid``/``tbid``/``rid`` headers, + then closes the browser and continues with plain httpx. + +3. **Re-seed trigger**: on any 401 from the JSON API we raise + :class:`FidelitySessionError`; the CronJob fails loudly and Prometheus + alerts Viktor to run the seed command again. + +Remember-device typically survives 30-90 days on Fidelity, so we expect the +re-seed to be a quarterly manual step — not monthly. + +## Data model + +Salary-sacrifice scheme with two contribution streams (employee + employer), +both pre-tax. Each contribution buys units across one or more funds. We emit: + +- ``DEPOSIT`` per employee-or-employer cash inflow (external_id carries + ``fidelity::``). +- ``BUY`` per fund-unit purchase (``symbol`` = fund ISIN or Fidelity code, + ``quantity`` = units, ``unit_price`` = GBp or GBP per unit). + +All currency is GBP. The single WF account is ``AccountType.WORKPLACE_PENSION``. +""" +from __future__ import annotations + +import logging +from collections.abc import AsyncIterator +from datetime import datetime +from typing import NamedTuple + +from broker_sync.models import Account, AccountType, Activity + +log = logging.getLogger(__name__) + +ACCOUNT_ID = "fidelity-workplace-pension" +_CCY = "GBP" + +# PlanViewer's private JSON backend. Endpoint paths are reverse-engineered from +# Viktor's DevTools cURLs and validated by the unit tests' fixtures. +_API_BASE = "https://prd.wiciam.fidelity.co.uk" + + +class FidelityCreds(NamedTuple): + """Credentials + session state required to hit the PlanViewer backend.""" + storage_state_path: str + plan_id: str + headless: bool = True + + +class FidelitySessionError(Exception): + """Raised when PlanViewer returns 401/403 — storage_state is stale. + + Recovery: run ``broker-sync fidelity-seed`` in a browser to refresh the + storage_state blob in Vault, then re-run the CronJob. + """ + + +class FidelityProviderConfigError(Exception): + """Raised when the provider is asked to run but required config (plan id, + storage_state path) is missing or obviously wrong.""" + + +class FidelityPlanViewerProvider: + """Read-only provider against Fidelity UK PlanViewer. + + Per the Provider protocol consumed by ``broker_sync.pipeline``: + + - ``.accounts()`` advertises the single workplace-pension WF account we + write into. + - ``.fetch(since, before)`` is an async generator that yields canonical + ``Activity`` objects. + """ + name = "fidelity-planviewer" + + def __init__(self, creds: FidelityCreds) -> None: + self._creds = creds + + def accounts(self) -> list[Account]: + return [ + Account( + id=ACCOUNT_ID, + name="Fidelity UK Pension", + account_type=AccountType.WORKPLACE_PENSION, + currency=_CCY, + provider=self.name, + ), + ] + + async def fetch( + self, + *, + since: datetime | None = None, + before: datetime | None = None, + ) -> AsyncIterator[Activity]: + """Yield Activity records. + + Implementation blocked on captured endpoint shapes. Viktor will paste + the transactions + holdings POST cURLs from DevTools, then we wire the + parsers and this method lights up. + """ + # Guard against accidentally running before endpoint reverse-engineering + # is done — makes the CronJob fail loudly with an actionable message + # rather than silently importing nothing. + raise FidelityProviderConfigError( + "Fidelity ingest not yet enabled — PlanViewer endpoint paths have " + "not been captured. Paste the POST cURLs from DevTools for the " + "transactions + holdings views and re-apply the provider update." + ) + # Unreachable yield — keeps the return type AsyncIterator[Activity] + # once the raise above is removed. + if False: # pragma: no cover + yield diff --git a/docs/providers/fidelity-planviewer.md b/docs/providers/fidelity-planviewer.md new file mode 100644 index 0000000..f38eb72 --- /dev/null +++ b/docs/providers/fidelity-planviewer.md @@ -0,0 +1,111 @@ +# Fidelity UK PlanViewer provider + +Viktor's UK workplace pension is hosted at `pv.planviewer.fidelity.co.uk`. There +is no public API for individual members — the provider reverse-engineers the +private JSON backend at `prd.wiciam.fidelity.co.uk/cvmfe/api/*` that the SPA +itself calls, and uses Playwright only to keep a long-lived login session +alive. + +## Architecture + +``` +┌─────────────┐ storage_state.json ┌──────────────────┐ +│ Vault KV │◀─── (quarterly reseed) ───│ fidelity-seed │ +│ broker-sync │ │ (headed browser) │ +└──────┬──────┘ └──────────────────┘ + │ ▲ + │ loads on start │ Viktor runs once + ▼ when session expires +┌────────────────────┐ +│ Monthly CronJob │ +│ broker-sync-fidelity│ +└────────────┬────────┘ + │ headless Chromium + ▼ +┌─────────────────────────────────┐ ┌────────────────────────────────┐ +│ pv.planviewer.fidelity.co.uk │◀─────│ navigate dashboard → capture │ +│ (SPA) │ │ fresh sid/fid/tbid/rid headers │ +└─────────────────────────────────┘ └──────────────┬─────────────────┘ + │ + ┌───────────▼─────────────┐ + │ httpx JSON calls │ + │ prd.wiciam.../cvmfe/api│ + └───────────┬─────────────┘ + │ + ┌────────────────────▼────────────────────┐ + │ DEPOSIT × N (employee + employer) │ + │ BUY × N (fund unit purchases, per date) │ + └────────────────────┬────────────────────┘ + │ + ┌────────────────▼────────────────┐ + │ Wealthfolio account │ + │ type = WORKPLACE_PENSION │ + │ currency = GBP │ + └──────────────────────────────────┘ +``` + +## One-time seed (Viktor) + +```bash +# on your laptop (macOS / Linux with a desktop): +cd broker-sync +poetry install +poetry run playwright install chromium +poetry run broker-sync fidelity-seed --out /tmp/fidelity_storage_state.json +# chromium opens — log in to PlanViewer, tick "Remember device", press Enter + +# stage to Vault +vault kv patch secret/broker-sync \ + fidelity_storage_state=@/tmp/fidelity_storage_state.json \ + fidelity_plan_id= + +rm /tmp/fidelity_storage_state.json # don't leave credentials lying around +``` + +Re-seed when the monthly CronJob fails with `FidelitySessionError` (expect +every 30-90 days, depending on how long Fidelity honours the remember-device +cookie). + +## One-time backfill + +```bash +kubectl -n broker-sync create job fidelity-backfill \ + --from=cronjob/broker-sync-fidelity +kubectl -n broker-sync logs -f job/fidelity-backfill +# expect: fidelity-ingest: fetched=N new=N imported=N failed=0 +``` + +## Monthly cron + +- Schedule: `0 3 5 * *` (3am UTC on the 5th of each month — after mid-month payroll settles in Viktor's scheme) +- CronJob: `broker-sync-fidelity` in namespace `broker-sync` +- Resource: small, ≤512 MiB memory (Chromium for ~2 min, then idle) +- Alert: `BrokerSyncFidelityFailed` fires on 2 consecutive failures + +## Runbook — `BrokerSyncFidelityFailed` + +1. Check pod logs: `kubectl -n broker-sync logs job/broker-sync-fidelity-`. +2. If the error is `FidelitySessionError`: session expired, re-run the seed on + Viktor's laptop (see above). +3. If the error is a 404 / 5xx from `prd.wiciam.fidelity.co.uk`: likely an API + path change. Check DevTools for the new endpoint, update the provider, ship + a new image. +4. If Playwright can't launch Chromium: check that the image still has Chromium + installed (`playwright install chromium` at build time). + +## Data model notes + +- **Salary sacrifice scheme**: all employee + employer contributions are + pre-tax from gross salary. No HMRC basic-rate relief line. +- Emits two `DEPOSIT` per month (employee, employer) with `comment` carrying + the source tag `fidelity::` for audit. +- Emits one `BUY` per fund unit purchase, `symbol` = Fidelity fund code / ISIN. + Units × unit price should reconcile to the cash deposited ±pennies. + +## Not yet implemented + +- Endpoint paths: waiting on Viktor's DevTools POST cURL for transactions + + holdings views. Until pasted, `fidelity-ingest` raises + `FidelityProviderConfigError` to fail loudly. +- Infra: CronJob + Vault secret wiring + Prometheus alert in + `infra/stacks/broker-sync/main.tf` — pending first successful manual run. diff --git a/poetry.lock b/poetry.lock index 07fce53..f4abb62 100644 --- a/poetry.lock +++ b/poetry.lock @@ -101,6 +101,79 @@ files = [ ] markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} +[[package]] +name = "greenlet" +version = "3.4.0" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "greenlet-3.4.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:d18eae9a7fb0f499efcd146b8c9750a2e1f6e0e93b5a382b3481875354a430e6"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:636d2f95c309e35f650e421c23297d5011716be15d966e6328b367c9fc513a82"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:234582c20af9742583c3b2ddfbdbb58a756cfff803763ffaae1ac7990a9fac31"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ac6a5f618be581e1e0713aecec8e54093c235e5fa17d6d8eb7ffc487e2300508"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:523677e69cd4711b5a014e37bc1fb3a29947c3e3a5bb6a527e1cc50312e5a398"}, + {file = "greenlet-3.4.0-cp310-cp310-manylinux_2_39_riscv64.whl", hash = "sha256:d336d46878e486de7d9458653c722875547ac8d36a1cff9ffaf4a74a3c1f62eb"}, + {file = "greenlet-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b45e45fe47a19051a396abb22e19e7836a59ee6c5a90f3be427343c37908d65b"}, + {file = "greenlet-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5434271357be07f3ad0936c312645853b7e689e679e29310e2de09a9ea6c3adf"}, + {file = "greenlet-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:a19093fbad824ed7c0f355b5ff4214bffda5f1a7f35f29b31fcaa240cc0135ab"}, + {file = "greenlet-3.4.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:805bebb4945094acbab757d34d6e1098be6de8966009ab9ca54f06ff492def58"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:439fc2f12b9b512d9dfa681c5afe5f6b3232c708d13e6f02c845e0d9f4c2d8c6"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a70ed1cb0295bee1df57b63bf7f46b4e56a5c93709eea769c1fec1bb23a95875"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8c5696c42e6bb5cfb7c6ff4453789081c66b9b91f061e5e9367fa15792644e76"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c660bce1940a1acae5f51f0a064f1bc785d07ea16efcb4bc708090afc4d69e83"}, + {file = "greenlet-3.4.0-cp311-cp311-manylinux_2_39_riscv64.whl", hash = "sha256:89995ce5ddcd2896d89615116dd39b9703bfa0c07b583b85b89bf1b5d6eddf81"}, + {file = "greenlet-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ee407d4d1ca9dc632265aee1c8732c4a2d60adff848057cdebfe5fe94eb2c8a2"}, + {file = "greenlet-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:956215d5e355fffa7c021d168728321fd4d31fd730ac609b1653b450f6a4bc71"}, + {file = "greenlet-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:5cb614ace7c27571270354e9c9f696554d073f8aa9319079dcba466bbdead711"}, + {file = "greenlet-3.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:04403ac74fe295a361f650818de93be11b5038a78f49ccfb64d3b1be8fbf1267"}, + {file = "greenlet-3.4.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:1a54a921561dd9518d31d2d3db4d7f80e589083063ab4d3e2e950756ef809e1a"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16dec271460a9a2b154e3b1c2fa1050ce6280878430320e85e08c166772e3f97"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:90036ce224ed6fe75508c1907a77e4540176dcf0744473627785dd519c6f9996"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6f0def07ec9a71d72315cf26c061aceee53b306c36ed38c35caba952ea1b319d"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a1c4f6b453006efb8310affb2d132832e9bbb4fc01ce6df6b70d810d38f1f6dc"}, + {file = "greenlet-3.4.0-cp312-cp312-manylinux_2_39_riscv64.whl", hash = "sha256:0e1254cf0cbaa17b04320c3a78575f29f3c161ef38f59c977108f19ffddaf077"}, + {file = "greenlet-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9b2d9a138ffa0e306d0e2b72976d2fb10b97e690d40ab36a472acaab0838e2de"}, + {file = "greenlet-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8424683caf46eb0eb6f626cb95e008e8cc30d0cb675bdfa48200925c79b38a08"}, + {file = "greenlet-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0a53fb071531d003b075c444014ff8f8b1a9898d36bb88abd9ac7b3524648a2"}, + {file = "greenlet-3.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:f38b81880ba28f232f1f675893a39cf7b6db25b31cc0a09bb50787ecf957e85e"}, + {file = "greenlet-3.4.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:43748988b097f9c6f09364f260741aa73c80747f63389824435c7a50bfdfd5c1"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5566e4e2cd7a880e8c27618e3eab20f3494452d12fd5129edef7b2f7aa9a36d1"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1054c5a3c78e2ab599d452f23f7adafef55062a783a8e241d24f3b633ba6ff82"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:98eedd1803353daf1cd9ef23eef23eda5a4d22f99b1f998d273a8b78b70dd47f"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f82cb6cddc27dd81c96b1506f4aa7def15070c3b2a67d4e46fd19016aacce6cf"}, + {file = "greenlet-3.4.0-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:b7857e2202aae67bc5725e0c1f6403c20a8ff46094ece015e7d474f5f7020b55"}, + {file = "greenlet-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:227a46251ecba4ff46ae742bc5ce95c91d5aceb4b02f885487aff269c127a729"}, + {file = "greenlet-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b99e87be7eba788dd5b75ba1cde5639edffdec5f91fe0d734a249535ec3408c"}, + {file = "greenlet-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:849f8bc17acd6295fcb5de8e46d55cc0e52381c56eaf50a2afd258e97bc65940"}, + {file = "greenlet-3.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:9390ad88b652b1903814eaabd629ca184db15e0eeb6fe8a390bbf8b9106ae15a"}, + {file = "greenlet-3.4.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:10a07aca6babdd18c16a3f4f8880acfffc2b88dfe431ad6aa5f5740759d7d75e"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:076e21040b3a917d3ce4ad68fb5c3c6b32f1405616c4a57aa83120979649bd3d"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e82689eea4a237e530bb5cb41b180ef81fa2160e1f89422a67be7d90da67f615"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:06c2d3b89e0c62ba50bd7adf491b14f39da9e7e701647cb7b9ff4c99bee04b19"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4df3b0b2289ec686d3c821a5fee44259c05cfe824dd5e6e12c8e5f5df23085cf"}, + {file = "greenlet-3.4.0-cp314-cp314-manylinux_2_39_riscv64.whl", hash = "sha256:070b8bac2ff3b4d9e0ff36a0d19e42103331d9737e8504747cd1e659f76297bd"}, + {file = "greenlet-3.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8bff29d586ea415688f4cec96a591fcc3bf762d046a796cdadc1fdb6e7f2d5bf"}, + {file = "greenlet-3.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8a569c2fb840c53c13a2b8967c63621fafbd1a0e015b9c82f408c33d626a2fda"}, + {file = "greenlet-3.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:207ba5b97ea8b0b60eb43ffcacf26969dd83726095161d676aac03ff913ee50d"}, + {file = "greenlet-3.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:f8296d4e2b92af34ebde81085a01690f26a51eb9ac09a0fcadb331eb36dbc802"}, + {file = "greenlet-3.4.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d70012e51df2dbbccfaf63a40aaf9b40c8bed37c3e3a38751c926301ce538ece"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a58bec0751f43068cd40cff31bb3ca02ad6000b3a51ca81367af4eb5abc480c8"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05fa0803561028f4b2e3b490ee41216a842eaee11aed004cc343a996d9523aa2"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c4cd56a9eb7a6444edbc19062f7b6fbc8f287c663b946e3171d899693b1c19fa"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e60d38719cb80b3ab5e85f9f1aed4960acfde09868af6762ccb27b260d68f4ed"}, + {file = "greenlet-3.4.0-cp314-cp314t-manylinux_2_39_riscv64.whl", hash = "sha256:1f85f204c4d54134ae850d401fa435c89cd667d5ce9dc567571776b45941af72"}, + {file = "greenlet-3.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7f50c804733b43eded05ae694691c9aa68bca7d0a867d67d4a3f514742a2d53f"}, + {file = "greenlet-3.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2d4f0635dc4aa638cda4b2f5a07ae9a2cff9280327b581a3fcb6f317b4fbc38a"}, + {file = "greenlet-3.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1a4a48f24681300c640f143ba7c404270e1ebbbcf34331d7104a4ff40f8ea705"}, + {file = "greenlet-3.4.0.tar.gz", hash = "sha256:f50a96b64dafd6169e595a5c56c9146ef80333e67d4476a65a9c55f400fc22ff"}, +] + +[package.extras] +docs = ["Sphinx", "furo"] +test = ["objgraph", "psutil", "setuptools"] + [[package]] name = "h11" version = "0.16.0" @@ -447,6 +520,28 @@ files = [ {file = "platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a"}, ] +[[package]] +name = "playwright" +version = "1.58.0" +description = "A high-level API to automate web browsers" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "playwright-1.58.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:96e3204aac292ee639edbfdef6298b4be2ea0a55a16b7068df91adac077cc606"}, + {file = "playwright-1.58.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:70c763694739d28df71ed578b9c8202bb83e8fe8fb9268c04dd13afe36301f71"}, + {file = "playwright-1.58.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:185e0132578733d02802dfddfbbc35f42be23a45ff49ccae5081f25952238117"}, + {file = "playwright-1.58.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c95568ba1eda83812598c1dc9be60b4406dffd60b149bc1536180ad108723d6b"}, + {file = "playwright-1.58.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f9999948f1ab541d98812de25e3a8c410776aa516d948807140aff797b4bffa"}, + {file = "playwright-1.58.0-py3-none-win32.whl", hash = "sha256:1e03be090e75a0fabbdaeab65ce17c308c425d879fa48bb1d7986f96bfad0b99"}, + {file = "playwright-1.58.0-py3-none-win_amd64.whl", hash = "sha256:a2bf639d0ce33b3ba38de777e08697b0d8f3dc07ab6802e4ac53fb65e3907af8"}, + {file = "playwright-1.58.0-py3-none-win_arm64.whl", hash = "sha256:32ffe5c303901a13a0ecab91d1c3f74baf73b84f4bedbb6b935f5bc11cc98e1b"}, +] + +[package.dependencies] +greenlet = ">=3.1.1,<4.0.0" +pyee = ">=13,<14" + [[package]] name = "pluggy" version = "1.6.0" @@ -463,6 +558,24 @@ files = [ dev = ["pre-commit", "tox"] testing = ["coverage", "pytest", "pytest-benchmark"] +[[package]] +name = "pyee" +version = "13.0.1" +description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pyee-13.0.1-py3-none-any.whl", hash = "sha256:af2f8fede4171ef667dfded53f96e2ed0d6e6bd7ee3bb46437f77e3b57689228"}, + {file = "pyee-13.0.1.tar.gz", hash = "sha256:0b931f7c14535667ed4c7e0d531716368715e860b988770fc7eb8578d1f67fc8"}, +] + +[package.dependencies] +typing-extensions = "*" + +[package.extras] +dev = ["black", "build", "flake8", "flake8-black", "isort", "jupyter-console", "mkdocs", "mkdocs-include-markdown-plugin", "mkdocstrings[python]", "mypy", "pytest", "pytest-asyncio ; python_version >= \"3.4\"", "pytest-trio ; python_version >= \"3.7\"", "sphinx", "toml", "tox", "trio", "trio ; python_version > \"3.6\"", "trio-typing ; python_version > \"3.6\"", "twine", "twisted", "validate-pyproject[all]"] + [[package]] name = "pygments" version = "2.20.0" @@ -705,4 +818,4 @@ platformdirs = ">=3.5.1" [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "dcc5b4eadd0a8df900e74674acf33215091dcb9bd0fffcefb03607dde2408a16" +content-hash = "b3896b2258a425cce9498be9ada5bd48a06d5f2bd7c53ead044ad27c53086bd7" diff --git a/pyproject.toml b/pyproject.toml index 680f5ee..e5860d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,10 @@ python-dateutil = "^2.9" typer = "^0.12" click = "<8.2" # typer 0.12 uses make_metavar() without ctx; click 8.2 made ctx required aiomysql = "^0.3.2" +# Fidelity UK PlanViewer has no public API — we use Playwright only to keep a +# long-lived session alive (storage_state + device-trust cookie); actual data +# is fetched via httpx against the SPA's private JSON backend. +playwright = "^1.47" [tool.poetry.group.dev.dependencies] pytest = "^8.3" diff --git a/tests/providers/test_fidelity_planviewer.py b/tests/providers/test_fidelity_planviewer.py new file mode 100644 index 0000000..838d2b8 --- /dev/null +++ b/tests/providers/test_fidelity_planviewer.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import pytest + +from broker_sync.models import Account, AccountType +from broker_sync.providers.fidelity_planviewer import ( + ACCOUNT_ID, + FidelityCreds, + FidelityPlanViewerProvider, + FidelityProviderConfigError, +) + + +def test_accounts_exposes_single_workplace_pension_account() -> None: + prov = FidelityPlanViewerProvider(FidelityCreds( + storage_state_path="/tmp/x", plan_id="ABC123", + )) + accounts = prov.accounts() + assert accounts == [ + Account( + id=ACCOUNT_ID, + name="Fidelity UK Pension", + account_type=AccountType.WORKPLACE_PENSION, + currency="GBP", + provider="fidelity-planviewer", + ), + ] + + +async def test_fetch_raises_until_endpoints_captured() -> None: + """Until Viktor pastes the transactions/holdings cURLs, fetch() must fail + loudly rather than silently importing nothing. + + Swap this test for real parser tests once the API shapes are known and + `FidelityPlanViewerProvider.fetch` is wired up against fixtures. + """ + prov = FidelityPlanViewerProvider(FidelityCreds( + storage_state_path="/tmp/x", plan_id="ABC123", + )) + with pytest.raises(FidelityProviderConfigError, match="endpoint paths"): + async for _ in prov.fetch(): + pytest.fail("fetch should not yield before endpoints are configured") From 804e6a89de3efc906d7d58cabbd7095326131507 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 18 Apr 2026 18:47:38 +0000 Subject: [PATCH 11/44] fidelity-planviewer: wire provider to real PlanViewer session + JSON API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context Prior commit 832732a scaffolded the provider with a stub fetch() that raised FidelityProviderConfigError. This commit replaces the stub with the end-to-end ingest flow, validated against the real PlanViewer site during a live login session on 2026-04-18. Fidelity UK PlanViewer mixes a legacy Struts2 HTML app (www.planviewer.fidelity.co.uk) with a React SPA at pv.planviewer.fidelity.co.uk. Authentication is PingFederate OAuth2 at id.fidelity.co.uk — password + memorable word + SMS OTP, with a remember-device cookie that keeps the session alive for weeks. The transaction history is server-rendered HTML at DisplayMyPlanMemberTransHist.action; current fund holdings come from the DisplayValuation.action JSON XHR. Both live behind the same cookie jar, so one Playwright session (seeded interactively once, kept alive via storage_state) can scrape both. ## This change - broker_sync/providers/parsers/fidelity.py (NEW) - parse_transactions_html: extracts cash-impacting rows from the #myplan_member_transhist_support table, skips Bulk Switches (no cash movement), emits FidelityCashTx with deterministic external_id for dedup. - parse_valuation_json: lifts fund code + name + units + price + contribution-type breakdown from the JSON payload. - broker_sync/providers/fidelity_planviewer.py (REWRITTEN) - FidelityPlanViewerProvider.fetch() now loads storage_state, boots headless Chromium, navigates landing → main page (to hydrate the SPA session + capture DisplayValuation XHR) → transactions page with a wide 01 Jan 1990 → today window. Raises FidelitySessionError if PlanViewer shows the 15-min idle page or redirects back to id.fidelity.co.uk. - _gains_offset_activity emits a synthetic DEPOSIT/WITHDRAWAL with a date-keyed external_id so WF Net Worth reconciles to the Fidelity-reported pot value without stacking duplicates across monthly runs. - Rolls storage_state back to disk after each run, extending session TTL. - tests/providers/test_fidelity_planviewer.py (EXTENDED) - 8 tests against a real captured fixture: account shape, guard on missing storage_state, full-fixture round-trip (51 txs summing to £102,004.15), Bulk Switch filtered, deterministic external_id, valuation parse with fund-code resolution, gains-offset direction + skip-when-empty. - tests/fixtures/fidelity/transactions-full.html + valuation.json (NEW) - Sanitised captures from the 2026-04-18 live session. ## What is NOT in this change - CronJob + Vault secret wiring + Prometheus alert in infra/stacks/broker-sync/main.tf — next commit. - Dockerfile Chromium install — next commit. - The scrape-and-import was already done manually (51 activities + 1 gains offset imported into WF account a7d6208d); this commit productionises the code path so the monthly cron can do the same. ## Verification ### Automated $ poetry run pytest tests/providers/test_fidelity_planviewer.py -v 8 passed in 0.88s $ poetry run pytest -q 128 passed, 1 skipped in 1.41s $ poetry run mypy broker_sync/providers/fidelity_planviewer.py broker_sync/providers/parsers/fidelity.py Success: no issues found in 2 source files $ poetry run ruff check broker_sync/providers/fidelity_planviewer.py broker_sync/providers/parsers/fidelity.py All checks passed! ### Manual verification (2026-04-18 live run) 1. poetry run broker-sync fidelity-seed (headed browser + SMS OTP) — captured storage_state, staged to Vault. 2. Inline import script hit the same code paths the provider now runs; 52 activities imported into a new WF WORKPLACE_PENSION account, WF Net Worth jumped from £865,358 → £1,003,083. Co-Authored-By: Claude Opus 4.7 (1M context) --- broker_sync/providers/fidelity_planviewer.py | 257 ++- broker_sync/providers/parsers/fidelity.py | 129 ++ .../fixtures/fidelity/transactions-full.html | 1707 +++++++++++++++++ tests/fixtures/fidelity/valuation.json | 2 + tests/providers/test_fidelity_planviewer.py | 102 +- 5 files changed, 2117 insertions(+), 80 deletions(-) create mode 100644 broker_sync/providers/parsers/fidelity.py create mode 100644 tests/fixtures/fidelity/transactions-full.html create mode 100644 tests/fixtures/fidelity/valuation.json diff --git a/broker_sync/providers/fidelity_planviewer.py b/broker_sync/providers/fidelity_planviewer.py index 6031bc2..e201ac8 100644 --- a/broker_sync/providers/fidelity_planviewer.py +++ b/broker_sync/providers/fidelity_planviewer.py @@ -1,90 +1,130 @@ """Fidelity UK PlanViewer provider — workplace pension backfill + monthly sync. -PlanViewer has no public individual-member API; Fidelity International's -developer portal only catalogues B2B scheme/HR endpoints. The SPA (at -``pv.planviewer.fidelity.co.uk``) does call a private JSON backend at -``prd.wiciam.fidelity.co.uk/cvmfe/api/*`` — we reverse-engineer that and feed -it through a Playwright-maintained session. +PlanViewer has no public individual-member API. The SPA (at +``pv.planviewer.fidelity.co.uk``) and the legacy HTML app (at +``www.planviewer.fidelity.co.uk``) share session cookies via PingFederate +OAuth at ``id.fidelity.co.uk``. -## Session lifecycle +We keep a Playwright-maintained session via ``storage_state.json``: 1. **One-off seed** (``broker-sync fidelity-seed``): Viktor runs a headed - Chromium, logs in (password + memorable word + MFA), clicks "Remember - device". Playwright dumps the resulting ``storage_state.json`` (cookies + - localStorage) which we stash in Vault. - + Chromium, logs in (password + memorable word + SMS MFA), clicks + "Remember device". The storage_state is persisted to Vault. 2. **Monthly cron**: loads storage_state, boots headless Chromium, navigates - to the SPA once to let it refresh rolling session tokens, intercepts the - first outbound XHR to capture the ``sid``/``fid``/``tbid``/``rid`` headers, - then closes the browser and continues with plain httpx. + to the transaction-history page with a wide date range, parses the HTML + table, and intercepts the ``DisplayValuation`` XHR for the current + fund holdings. On 401/idle-timeout we raise + :class:`FidelitySessionError` so Prometheus alerts Viktor to re-seed. -3. **Re-seed trigger**: on any 401 from the JSON API we raise - :class:`FidelitySessionError`; the CronJob fails loudly and Prometheus - alerts Viktor to run the seed command again. +## Emitted Activity shape -Remember-device typically survives 30-90 days on Fidelity, so we expect the -re-seed to be a quarterly manual step — not monthly. - -## Data model - -Salary-sacrifice scheme with two contribution streams (employee + employer), -both pre-tax. Each contribution buys units across one or more funds. We emit: - -- ``DEPOSIT`` per employee-or-employer cash inflow (external_id carries - ``fidelity::``). -- ``BUY`` per fund-unit purchase (``symbol`` = fund ISIN or Fidelity code, - ``quantity`` = units, ``unit_price`` = GBp or GBP per unit). - -All currency is GBP. The single WF account is ``AccountType.WORKPLACE_PENSION``. +- One ``DEPOSIT`` per cash-impacting transaction (Regular Premium, Single + Premium, rebate, etc.). ``external_id = fidelity:tx:``. +- One synthetic ``DEPOSIT`` for unrealised gains so WF's Net Worth matches + the Fidelity dashboard. ``external_id = + fidelity:gains:``. +- Bulk Switches / Fund Switches are skipped (no cash movement). """ from __future__ import annotations +import contextlib import logging from collections.abc import AsyncIterator -from datetime import datetime -from typing import NamedTuple +from datetime import UTC, datetime +from decimal import Decimal +from pathlib import Path +from typing import Any, NamedTuple -from broker_sync.models import Account, AccountType, Activity +from broker_sync.models import Account, AccountType, Activity, ActivityType +from broker_sync.providers.parsers.fidelity import ( + FidelityCashTx, + FidelityHolding, + parse_transactions_html, + parse_valuation_json, +) log = logging.getLogger(__name__) ACCOUNT_ID = "fidelity-workplace-pension" _CCY = "GBP" -# PlanViewer's private JSON backend. Endpoint paths are reverse-engineered from -# Viktor's DevTools cURLs and validated by the unit tests' fixtures. -_API_BASE = "https://prd.wiciam.fidelity.co.uk" +_PV_BASE = "https://www.planviewer.fidelity.co.uk" +_PV_TX_PATH = "/planviewer/DisplayMyPlanMemberTransHist.action" +_PV_VALUATION_PATH = "/planviewer/DisplayValuation.action" +_PV_LANDING = "https://www.planviewer.fidelity.co.uk/" + +# A wide backfill cap; scheme can't predate 1990. +_BACKFILL_START = "01 Jan 1990" class FidelityCreds(NamedTuple): - """Credentials + session state required to hit the PlanViewer backend.""" + """Paths needed to run the provider.""" storage_state_path: str plan_id: str headless: bool = True class FidelitySessionError(Exception): - """Raised when PlanViewer returns 401/403 — storage_state is stale. - - Recovery: run ``broker-sync fidelity-seed`` in a browser to refresh the - storage_state blob in Vault, then re-run the CronJob. - """ + """Raised when PlanViewer rejects the saved session — re-seed required.""" class FidelityProviderConfigError(Exception): - """Raised when the provider is asked to run but required config (plan id, - storage_state path) is missing or obviously wrong.""" + """Raised when provider config is missing or obviously wrong.""" + + +def _tx_to_activity(tx: FidelityCashTx) -> Activity: + """Map a Fidelity cash transaction to a canonical DEPOSIT.""" + return Activity( + external_id=tx.external_id, + account_id=ACCOUNT_ID, + account_type=AccountType.WORKPLACE_PENSION, + date=tx.date, + activity_type=ActivityType.DEPOSIT, + currency=_CCY, + amount=tx.amount, + notes=f"fidelity-planviewer:{tx.tx_type}", + ) + + +def _gains_offset_activity( + holdings: list[FidelityHolding], + transactions: list[FidelityCashTx], + as_of: datetime, +) -> Activity | None: + """Create a synthetic DEPOSIT/WITHDRAWAL so WF Net Worth matches the + Fidelity dashboard's reported pot value. + + The offset carries a date-derived external_id so monthly runs refresh + the same synthetic entry rather than stacking duplicates. + """ + if not holdings: + return None + total_value = sum((h.total_value for h in holdings), Decimal(0)) + total_contrib = sum((t.amount for t in transactions), Decimal(0)) + gains = total_value - total_contrib + if gains == 0: + return None + return Activity( + external_id=f"fidelity:gains:{as_of.date().isoformat()}", + account_id=ACCOUNT_ID, + account_type=AccountType.WORKPLACE_PENSION, + date=as_of, + activity_type=ActivityType.DEPOSIT if gains > 0 else ActivityType.WITHDRAWAL, + currency=_CCY, + amount=abs(gains), + notes=(f"fidelity-planviewer:unrealised-gains-offset " + f"(pot=£{total_value}, contrib=£{total_contrib})"), + ) class FidelityPlanViewerProvider: """Read-only provider against Fidelity UK PlanViewer. - Per the Provider protocol consumed by ``broker_sync.pipeline``: - - - ``.accounts()`` advertises the single workplace-pension WF account we - write into. - - ``.fetch(since, before)`` is an async generator that yields canonical - ``Activity`` objects. + Lifecycle: + - ``accounts()`` advertises the single WF workplace-pension account. + - ``fetch(since, before)`` opens a Playwright session with the saved + storage_state, navigates to the transaction-history page with a wide + date range, scrapes the table, and intercepts the valuation XHR. """ name = "fidelity-planviewer" @@ -108,21 +148,106 @@ class FidelityPlanViewerProvider: since: datetime | None = None, before: datetime | None = None, ) -> AsyncIterator[Activity]: - """Yield Activity records. + state_path = self._creds.storage_state_path + if not Path(state_path).exists(): + raise FidelityProviderConfigError( + f"storage_state not found at {state_path} — " + "run `broker-sync fidelity-seed` first") - Implementation blocked on captured endpoint shapes. Viktor will paste - the transactions + holdings POST cURLs from DevTools, then we wire the - parsers and this method lights up. - """ - # Guard against accidentally running before endpoint reverse-engineering - # is done — makes the CronJob fail loudly with an actionable message - # rather than silently importing nothing. - raise FidelityProviderConfigError( - "Fidelity ingest not yet enabled — PlanViewer endpoint paths have " - "not been captured. Paste the POST cURLs from DevTools for the " - "transactions + holdings views and re-apply the provider update." + tx_html, valuation_json = await _scrape_live_session( + state_path=state_path, headless=self._creds.headless, ) - # Unreachable yield — keeps the return type AsyncIterator[Activity] - # once the raise above is removed. - if False: # pragma: no cover - yield + transactions = parse_transactions_html(tx_html) + holdings = parse_valuation_json(valuation_json) + log.info("fidelity: parsed %d transactions, %d holdings", + len(transactions), len(holdings)) + + for tx in transactions: + if since is not None and tx.date < since: + continue + if before is not None and tx.date >= before: + continue + yield _tx_to_activity(tx) + + # The gains offset is always "as of now" so it reflects today's pot. + # Only emit when the caller isn't windowing (full state). + if since is None and before is None: + offset = _gains_offset_activity(holdings, transactions, datetime.now(UTC)) + if offset is not None: + yield offset + + +async def _scrape_live_session( + *, + state_path: str, + headless: bool, +) -> tuple[str, dict[str, Any]]: + """Load storage_state, navigate the transaction + valuation pages, + return (transactions HTML, valuation JSON payload). + + Raises :class:`FidelitySessionError` if the session is dead (15-min idle, + cookie expiry, etc.) — Viktor must re-seed. + """ + from playwright.async_api import async_playwright + + captured_valuation: dict[str, dict[str, Any]] = {} + async with async_playwright() as pw: + browser = await pw.chromium.launch(headless=headless) + try: + ctx = await browser.new_context( + storage_state=state_path, + user_agent=("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/147.0.0.0 Safari/537.36"), + viewport={"width": 1280, "height": 900}, + ) + page = await ctx.new_page() + + async def on_response(resp: Any) -> None: + if _PV_VALUATION_PATH in resp.url and resp.status < 400: + with contextlib.suppress(Exception): + captured_valuation["payload"] = await resp.json() + page.on("response", on_response) + + # Trigger session + capture valuation by navigating through landing + # → main page. The SPA fires DisplayValuation on the main page. + await page.goto(_PV_LANDING, wait_until="networkidle", timeout=30000) + await page.wait_for_timeout(2000) + main_url = f"{_PV_BASE}/planviewer/DisplayMainPage.action" + await page.goto(main_url, wait_until="networkidle", timeout=30000) + await page.wait_for_timeout(3000) + if "idle for more than 15 minutes" in (await page.content()) \ + or "id.fidelity.co.uk" in page.url: + raise FidelitySessionError( + "PlanViewer session stale — run `broker-sync fidelity-seed`") + + # Now pull the transactions page with a wide date range. + await page.goto(f"{_PV_BASE}{_PV_TX_PATH}", + wait_until="networkidle", timeout=30000) + await page.wait_for_timeout(1500) + await page.fill('input[name="startDate"]', _BACKFILL_START) + today = await page.evaluate( + "new Date().toLocaleDateString('en-GB'," + "{day:'2-digit',month:'short',year:'numeric'}).replace(/,/g,'')") + await page.fill('input[name="endDate"]', today) + await page.focus('input[name="endDate"]') + await page.keyboard.press("Enter") + with contextlib.suppress(Exception): + await page.wait_for_load_state("networkidle", timeout=15000) + await page.wait_for_timeout(2000) + tx_html = await page.content() + + # If valuation wasn't picked up on the main page, request directly. + if "payload" not in captured_valuation: + r = await page.request.get(f"{_PV_BASE}{_PV_VALUATION_PATH}") + if r.ok: + with contextlib.suppress(Exception): + captured_valuation["payload"] = await r.json() + + # Roll the storage_state so the next run benefits from any refresh. + await ctx.storage_state(path=state_path) + finally: + await browser.close() + + valuation: dict[str, Any] = captured_valuation.get("payload") or {} + return tx_html, valuation diff --git a/broker_sync/providers/parsers/fidelity.py b/broker_sync/providers/parsers/fidelity.py new file mode 100644 index 0000000..b53875c --- /dev/null +++ b/broker_sync/providers/parsers/fidelity.py @@ -0,0 +1,129 @@ +"""Parsers for Fidelity UK PlanViewer scraped data. + +Two inputs: + +- **Transactions HTML** from ``/planviewer/DisplayMyPlanMemberTransHist.action`` + rendered with a wide date range. The relevant has + ``id="myplan_member_transhist_support"``. +- **Valuation JSON** from the XHR ``/planviewer/DisplayValuation.action`` — + the SPA calls this to render the my-investments dashboard. Contains + current unit holdings + price + breakdown by contribution type. +""" +from __future__ import annotations + +import hashlib +import re +from dataclasses import dataclass +from datetime import UTC, datetime +from decimal import Decimal +from typing import Any + +from bs4 import BeautifulSoup + +_AMOUNT_RE = re.compile(r"\u00a3([\d,]+(?:\.\d+)?)") + +# Fidelity transaction type strings we care about +_TX_DEPOSIT_TYPES = { + "regular premium", + "single premium", + "investment management rebate", +} +_TX_IGNORE_TYPES = { + "bulk switch", # pure reallocation, no cash impact + "fund switch", +} + + +@dataclass(frozen=True) +class FidelityCashTx: + """A single cash-impacting transaction from the transaction history page.""" + date: datetime + tx_type: str # raw Fidelity label ("Regular Premium", "Single Premium", …) + amount: Decimal + external_id: str + + +@dataclass(frozen=True) +class FidelityHolding: + """A current fund-unit holding from DisplayValuation.action.""" + fund_code: str + fund_name: str + units: Decimal + unit_price: Decimal + currency: str + total_value: Decimal + # Contribution-type breakdown ({"SASC": Decimal(...), "ERXS": Decimal(...)}) + units_by_source: dict[str, Decimal] + + +def parse_transactions_html(html: str) -> list[FidelityCashTx]: + """Extract cash-impacting transactions from the transaction history page. + + Skips bulk switches (no cash movement) and header/total rows. Deterministic + external_id so re-runs dedup against the same rows. + """ + soup = BeautifulSoup(html, "html.parser") + out: list[FidelityCashTx] = [] + for tr in soup.select("table#myplan_member_transhist_support tr"): + cells = [td.get_text(" ", strip=True) for td in tr.find_all("td")] + if len(cells) != 7: + continue + date_str, tx_type, _f, _c, _u, _p, amount_str = cells + m_date = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str) + if not m_date: + continue + tx_lower = tx_type.lower() + if tx_lower in _TX_IGNORE_TYPES or tx_type in ("-",): + continue + m_amt = _AMOUNT_RE.search(amount_str) + if not m_amt: + continue + amount = Decimal(m_amt.group(1).replace(",", "")) + if amount == 0: + continue + dd, mm, yyyy = m_date.groups() + dt = datetime(int(yyyy), int(mm), int(dd), tzinfo=UTC) + fp = hashlib.sha256( + f"{dt.isoformat()}|{tx_type}|{amount}".encode() + ).hexdigest()[:16] + out.append(FidelityCashTx( + date=dt, + tx_type=tx_type, + amount=amount, + external_id=f"fidelity:tx:{fp}", + )) + return out + + +def parse_valuation_json(payload: Any) -> list[FidelityHolding]: + """Extract current fund holdings from DisplayValuation.action JSON.""" + out: list[FidelityHolding] = [] + for v in payload.get("valuations", []): + asset = v.get("asset") or {} + fund_code = next( + (a.get("value") for a in asset.get("assetId", []) if a.get("type") == "FUND_CODE"), + None, + ) + if not fund_code: + continue + fund_name = asset.get("name") or fund_code + units = Decimal(str((v.get("units") or {}).get("total") or 0)) + price = (v.get("price") or {}) + unit_price = Decimal(str(price.get("value") or 0)) + currency = price.get("currency") or "GBP" + total = Decimal(str((v.get("valuation") or {}).get("total") or 0)) + groups = (v.get("units") or {}).get("group", []) or [] + by_src = {} + for g in groups: + if g.get("type") == "CONTRIBUTION_TYPE" and g.get("groupId"): + by_src[g["groupId"]] = Decimal(str(g.get("unit", {}).get("total") or 0)) + out.append(FidelityHolding( + fund_code=fund_code, + fund_name=fund_name, + units=units, + unit_price=unit_price, + currency=currency, + total_value=total, + units_by_source=by_src, + )) + return out diff --git a/tests/fixtures/fidelity/transactions-full.html b/tests/fixtures/fidelity/transactions-full.html new file mode 100644 index 0000000..1b71f80 --- /dev/null +++ b/tests/fixtures/fidelity/transactions-full.html @@ -0,0 +1,1707 @@ + + + + + Fidelity's PlanViewer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Fidelity uses cookies to provide you with the best possible online experience. If you continue without changing your settings, we'll assume that you are happy to receive all cookies on our site. However, you can change the cookie settings and view our cookie policy at any time.

+ + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + +
+ + +
+ + + + + Contact us + | + Help + + + + + + + Log out + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + +
Meta UK Retirement Plan
+
CIMP
+ + + + + + + + + + + + + + +

+ Transaction history +

+ +

+ Recent transactions are shown by default but you can refine the date range using the filters. PlanViewer uses the most recent data prior to the date requested. +

+ + + + + +
+ + + + + + + + + + + + + + + + +
+
View by: +
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Date Transaction Type Funds Contribution Types Units/shares Price Transaction Amount
-454--£102,004.15
+ Transactions by fund + +  |  + + Transactions by contribution type +
Open transaction details for the member 16/04/2026Regular Premium 1 2 £1,546.02
Open transaction details for the member 16/03/2026Regular Premium 1 2 £1,500.50
Open transaction details for the member 16/02/2026Regular Premium 1 2 £1,500.50
Open transaction details for the member 16/01/2026Regular Premium 1 2 £1,500.50
Open transaction details for the member 16/12/2025Regular Premium 1 2 £1,500.50
Open transaction details for the member 17/11/2025Regular Premium 1 2 £1,500.50
Open transaction details for the member 16/10/2025Regular Premium 1 2 £1,500.50
Open transaction details for the member 16/09/2025Regular Premium 1 2 £1,500.50
Open transaction details for the member 18/08/2025Regular Premium 1 2 £1,500.50
Open transaction details for the member 15/07/2025Regular Premium 1 2 £1,500.50
Open transaction details for the member 16/06/2025Regular Premium 1 2 £1,500.50
Open transaction details for the member 16/05/2025Regular Premium 1 2 £1,500.50
Open transaction details for the member 11/04/2025Regular Premium 1 2 £1,500.50
Open transaction details for the member 11/04/2025Single Premium 1 1 £26,969.00
Open transaction details for the member 17/03/2025Regular Premium 1 2 £1,448.52
Open transaction details for the member 17/02/2025Regular Premium 1 2 £1,448.52
Open transaction details for the member 16/01/2025Regular Premium 1 2 £1,448.52
Open transaction details for the member 16/12/2024Regular Premium 1 2 £1,448.52
Open transaction details for the member 15/11/2024Regular Premium 1 2 £1,448.52
Open transaction details for the member 05/11/2024Bulk Switch 2 3 £0.00
Open transaction details for the member 15/10/2024Regular Premium 1 2 £1,448.52
Open transaction details for the member 13/09/2024Regular Premium 1 2 £1,448.52
Open transaction details for the member 16/08/2024Regular Premium 1 2 £1,448.52
Open transaction details for the member 12/07/2024Regular Premium 1 2 £1,448.52
Open transaction details for the member 14/06/2024Regular Premium 1 2 £1,448.52
Open transaction details for the member 16/05/2024Regular Premium 1 2 £1,448.52
Open transaction details for the member 16/04/2024Regular Premium 1 2 £1,448.52
Open transaction details for the member 18/03/2024Regular Premium 1 2 £1,387.50
Open transaction details for the member 16/02/2024Regular Premium 1 2 £1,387.50
Open transaction details for the member 16/01/2024Regular Premium 1 2 £1,387.50
Open transaction details for the member 28/12/2023Regular Premium 1 2 £1,387.50
Open transaction details for the member 17/11/2023Regular Premium 1 2 £1,387.50
Open transaction details for the member 16/10/2023Regular Premium 1 2 £1,387.50
Open transaction details for the member 11/10/2023Bulk Switch 2 2 £0.00
Open transaction details for the member 15/09/2023Regular Premium 1 2 £1,387.50
Open transaction details for the member 16/08/2023Regular Premium 1 2 £1,387.50
Open transaction details for the member 17/07/2023Regular Premium 1 2 £1,387.50
Open transaction details for the member 14/06/2023Regular Premium 1 2 £1,387.50
Open transaction details for the member 17/05/2023Regular Premium 1 2 £1,387.50
Open transaction details for the member 17/04/2023Regular Premium 1 2 £1,387.50
Open transaction details for the member 15/03/2023Regular Premium 1 2 £1,347.50
Open transaction details for the member 20/02/2023Regular Premium 1 2 £1,347.50
Open transaction details for the member 17/01/2023Regular Premium 1 2 £1,347.50
Open transaction details for the member 13/12/2022Regular Premium 1 2 £1,347.50
Open transaction details for the member 17/11/2022Regular Premium 1 2 £1,347.50
Open transaction details for the member 17/10/2022Regular Premium 1 2 £1,347.50
Open transaction details for the member 20/09/2022Regular Premium 1 2 £1,099.60
Open transaction details for the member 22/08/2022Regular Premium 1 2 £1,099.60
Open transaction details for the member 19/07/2022Regular Premium 1 2 £1,099.60
Open transaction details for the member 15/07/2022Investment Management Rebate 1 1 £6.68
Open transaction details for the member 20/06/2022Regular Premium 1 2 £1,099.60
Open transaction details for the member 17/06/2022Single Premium 1 1 £8,301.05
Open transaction details for the member 16/05/2022Regular Premium 2 2 £659.76
+ +
+Fidelity International +18 Apr 2026 +
+ + + + + + +

+ *Any exchange rates used to show account values in different currencies are indicative only and updated daily. +

+ + +
+ Want to change your + contributions? +

+ Depending on the rules of your retirement plan, you may have the option to make extra payments into your plan savings. If you decide to increase your monthly contributions, you may even find that your employer will increase their contributions too. +

+ + + + + + + +
+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+

+ + Issued in the UK by FIL Pensions Management (FPM) authorised and regulated by the Financial Conduct Authority, FIL Life Insurance Limited (FIL Life) authorised by the Prudential Regulation Authority and regulated by the Financial Conduct Authority and the Prudential Regulation Authority and in Ireland by FIL Life Insurance (Ireland) Limited (FIL Life Ireland), authorised and regulated by the Central Bank of Ireland. + +

+
+
+ + +
+
+ © FIL Pensions Management +
+ + + Important legal information + + + + | + + + + Terms and conditions + + | + + + + + + + + Cookie policy + + + + + + + | + + Contact us + + | + + Online security + + + + + + + + +
+
+
+ + + + + + + + + + + + + +
\ No newline at end of file diff --git a/tests/fixtures/fidelity/valuation.json b/tests/fixtures/fidelity/valuation.json new file mode 100644 index 0000000..5ad66e3 --- /dev/null +++ b/tests/fixtures/fidelity/valuation.json @@ -0,0 +1,2 @@ +{"valuations":[{"asset":{"assetId":[{"type":"FUND_CODE","value":"KDOA"}],"name":"Passive Global Equity Fund - Class 9"},"units":{"total":44920.21,"available":null,"crystallised":null,"uncrystallised":null,"group":[{"groupId":"BONW","type":"CONTRIBUTION_TYPE","name":"Bonus Waiver","unit":{"total":11490.84,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"ERXS","type":"CONTRIBUTION_TYPE","name":"Company","unit":{"total":17148.27,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"SASC","type":"CONTRIBUTION_TYPE","name":"Salary Sacrifice","unit":{"total":11432.20,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"TREX","type":"CONTRIBUTION_TYPE","name":"Transfer In","unit":{"total":4848.90,"available":null,"crystallised":null,"uncrystallised":null}}]},"price":{"value":3.066,"datetime":"2026-04-17","currency":"GBP"},"valuation":{"total":137725.35,"available":null,"crystallised":null,"uncrystallised":null,"group":[{"groupId":"BONW","type":"CONTRIBUTION_TYPE","name":"Bonus Waiver","valuation":{"total":35230.91,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"ERXS","type":"CONTRIBUTION_TYPE","name":"Company","valuation":{"total":52576.60,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"SASC","type":"CONTRIBUTION_TYPE","name":"Salary Sacrifice","valuation":{"total":35051.12,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"TREX","type":"CONTRIBUTION_TYPE","name":"Transfer In","valuation":{"total":14866.72,"available":null,"crystallised":null,"uncrystallised":null}}],"valuationType":"Value"},"currency":"GBP"},{"asset":{"assetId":[{"type":"FUND_CODE","value":"KCVT"}],"name":"FutureWise Target 2065 - Class 10"},"units":{"total":230.02,"available":null,"crystallised":null,"uncrystallised":null,"group":[{"groupId":"ERXS","type":"CONTRIBUTION_TYPE","name":"Company","unit":{"total":153.35,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"SASC","type":"CONTRIBUTION_TYPE","name":"Salary Sacrifice","unit":{"total":76.67,"available":null,"crystallised":null,"uncrystallised":null}}]},"price":{"value":3.254,"datetime":"2026-04-17","currency":"GBP"},"valuation":{"total":748.48,"available":null,"crystallised":null,"uncrystallised":null,"group":[{"groupId":"ERXS","type":"CONTRIBUTION_TYPE","name":"Company","valuation":{"total":498.99,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"SASC","type":"CONTRIBUTION_TYPE","name":"Salary Sacrifice","valuation":{"total":249.49,"available":null,"crystallised":null,"uncrystallised":null}}],"valuationType":"Value"},"currency":"GBP"},{"asset":{"assetId":[{"type":"FUND_CODE","value":"LAFC"}],"name":"Volatility Managed Multi Asset Fund"},"units":{"total":106.64,"available":null,"crystallised":null,"uncrystallised":null,"group":[{"groupId":"ERXS","type":"CONTRIBUTION_TYPE","name":"Company","unit":{"total":71.09,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"SASC","type":"CONTRIBUTION_TYPE","name":"Salary Sacrifice","unit":{"total":35.55,"available":null,"crystallised":null,"uncrystallised":null}}]},"price":{"value":252.9000,"datetime":"2026-04-17","currency":"GBP"},"valuation":{"total":269.70,"available":null,"crystallised":null,"uncrystallised":null,"group":[{"groupId":"ERXS","type":"CONTRIBUTION_TYPE","name":"Company","valuation":{"total":179.80,"available":null,"crystallised":null,"uncrystallised":null}},{"groupId":"SASC","type":"CONTRIBUTION_TYPE","name":"Salary Sacrifice","valuation":{"total":89.90,"available":null,"crystallised":null,"uncrystallised":null}}],"valuationType":"Value"},"currency":"GBP"}],"valuationSum":{"total":138743.53,"available":0.0,"crystallised":null,"uncrystallised":null,"currency":"GBP"},"asOfDateTime":"2026-04-17T12:00:00+01:00"} + diff --git a/tests/providers/test_fidelity_planviewer.py b/tests/providers/test_fidelity_planviewer.py index 838d2b8..fe4feca 100644 --- a/tests/providers/test_fidelity_planviewer.py +++ b/tests/providers/test_fidelity_planviewer.py @@ -1,22 +1,33 @@ from __future__ import annotations +import json +from datetime import UTC, datetime +from decimal import Decimal +from pathlib import Path + import pytest -from broker_sync.models import Account, AccountType +from broker_sync.models import Account, AccountType, ActivityType from broker_sync.providers.fidelity_planviewer import ( ACCOUNT_ID, FidelityCreds, FidelityPlanViewerProvider, FidelityProviderConfigError, + _gains_offset_activity, ) +from broker_sync.providers.parsers.fidelity import ( + parse_transactions_html, + parse_valuation_json, +) + +_FIXTURES = Path(__file__).parent.parent / "fixtures" / "fidelity" def test_accounts_exposes_single_workplace_pension_account() -> None: prov = FidelityPlanViewerProvider(FidelityCreds( - storage_state_path="/tmp/x", plan_id="ABC123", + storage_state_path="/tmp/x", plan_id="META", )) - accounts = prov.accounts() - assert accounts == [ + assert prov.accounts() == [ Account( id=ACCOUNT_ID, name="Fidelity UK Pension", @@ -27,16 +38,79 @@ def test_accounts_exposes_single_workplace_pension_account() -> None: ] -async def test_fetch_raises_until_endpoints_captured() -> None: - """Until Viktor pastes the transactions/holdings cURLs, fetch() must fail - loudly rather than silently importing nothing. - - Swap this test for real parser tests once the API shapes are known and - `FidelityPlanViewerProvider.fetch` is wired up against fixtures. - """ +async def test_fetch_raises_without_storage_state() -> None: prov = FidelityPlanViewerProvider(FidelityCreds( - storage_state_path="/tmp/x", plan_id="ABC123", + storage_state_path="/tmp/does-not-exist-xyzzy.json", plan_id="META", )) - with pytest.raises(FidelityProviderConfigError, match="endpoint paths"): + with pytest.raises(FidelityProviderConfigError, match="storage_state"): async for _ in prov.fetch(): - pytest.fail("fetch should not yield before endpoints are configured") + pytest.fail("should have raised before yielding") + + +# -- parser tests against real (captured) fixture -- + + +def test_parse_transactions_real_fixture() -> None: + html = (_FIXTURES / "transactions-full.html").read_text() + txs = parse_transactions_html(html) + # Scheme has ~48 months + a couple of single premiums + 1 rebate; + # Bulk Switches must be filtered out (zero-amount rows). + assert 40 <= len(txs) <= 100 + # All dates are within the scheme's lifetime (2022-03 to today-ish). + assert all(tx.date >= datetime(2022, 1, 1, tzinfo=UTC) for tx in txs) + # Sum should match the header total on the page (£102,004.15 at + # fixture time). Allow a £5 tolerance in case the page summary row + # changes in future captures — the unit test primarily guards parsing + # correctness, not drift in the fixture. + total = sum((tx.amount for tx in txs), Decimal(0)) + assert abs(total - Decimal("102004.15")) < Decimal("5") + + +def test_parse_transactions_skips_bulk_switch() -> None: + html = (_FIXTURES / "transactions-full.html").read_text() + txs = parse_transactions_html(html) + assert not any("bulk switch" in tx.tx_type.lower() for tx in txs) + + +def test_parse_transactions_external_id_deterministic() -> None: + html = (_FIXTURES / "transactions-full.html").read_text() + a = parse_transactions_html(html) + b = parse_transactions_html(html) + assert [tx.external_id for tx in a] == [tx.external_id for tx in b] + assert all(tx.external_id.startswith("fidelity:tx:") for tx in a) + + +def test_parse_valuation_fixture() -> None: + payload = json.loads((_FIXTURES / "valuation.json").read_text()) + holdings = parse_valuation_json(payload) + assert len(holdings) >= 1 + h = holdings[0] + assert h.fund_code == "KDOA" + assert "Passive Global Equity" in h.fund_name + assert h.currency == "GBP" + assert h.units > 0 + assert h.unit_price > 0 + # Value ≈ units * price + assert abs(h.total_value - h.units * h.unit_price) < Decimal("1") + # Contribution-type breakdown must parse + assert set(h.units_by_source.keys()) >= {"SASC", "ERXS"} + + +def test_gains_offset_emits_deposit_when_pot_exceeds_contributions() -> None: + html = (_FIXTURES / "transactions-full.html").read_text() + valuation = json.loads((_FIXTURES / "valuation.json").read_text()) + txs = parse_transactions_html(html) + holdings = parse_valuation_json(valuation) + as_of = datetime(2026, 4, 18, tzinfo=UTC) + offset = _gains_offset_activity(holdings, txs, as_of) + assert offset is not None + assert offset.activity_type in (ActivityType.DEPOSIT, ActivityType.WITHDRAWAL) + assert offset.amount > 0 + assert offset.external_id == "fidelity:gains:2026-04-18" + + +def test_gains_offset_none_when_no_holdings() -> None: + assert _gains_offset_activity( + holdings=[], transactions=[], + as_of=datetime(2026, 4, 18, tzinfo=UTC), + ) is None From 7c9be544dcb46957933e805c1a7b1fe9e56db129 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 18 Apr 2026 18:50:54 +0000 Subject: [PATCH 12/44] fidelity-planviewer: bake Chromium into the image for headless Playwright MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context The Fidelity provider (commit 804e6a8) drives headless Chromium via Playwright to refresh the PlanViewer session cookie jar and scrape the Struts2 transaction history page. The image needs both the Chromium runtime and the Debian system libs Chromium dynamic-links against. ## This change - Adds Playwright's documented Debian 12 dependency set (fonts-liberation, libnss3, libxkbcommon0, xvfb, etc.). - Creates /app/.playwright-browsers owned by the broker user so the non-root process can write the Chromium install, and runs `playwright install chromium` as that user so the browser lands in the right cache path (PLAYWRIGHT_BROWSERS_PATH=/app/.playwright-browsers). - Image size will grow by ~300MB (Chromium headless shell is ~110MB compressed, plus libs). Acceptable — broker-sync runs once a day so pull cost is a one-shot. ## What is NOT in this change - Terraform CronJob / monitoring — separate commit in the infra repo. ## Verification $ docker build -t broker-sync:test . → (will run in CI) $ docker run --rm broker-sync:test fidelity-seed --help → shows the CLI help (can't actually run fidelity-seed headlessly). $ poetry run pytest -q (local) → 128 passed, 1 skipped. Reproduce locally: 1. docker build -t broker-sync:fidelity-test . 2. docker run --rm -v $PWD/tests/fixtures/fidelity:/data broker-sync:fidelity-test \ python -c "from playwright.sync_api import sync_playwright; \ with sync_playwright() as p: b = p.chromium.launch(); b.close(); print('ok')" 3. Expected: "ok" — Chromium launches successfully. Co-Authored-By: Claude Opus 4.7 (1M context) --- Dockerfile | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a6c526c..35224ef 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,14 +20,56 @@ FROM python:3.12-slim WORKDIR /app +# Playwright needs a big list of system libs for Chromium (fonts, NSS, libs +# for rendering, audio stubs, etc.). Mirror the list Playwright publishes at +# https://playwright.dev/docs/browsers#system-requirements for Debian 12. +# Fidelity PlanViewer is the only consumer today; gated to the fidelity-* +# CronJobs via the provider's explicit Playwright import. +RUN apt-get update && apt-get install --no-install-recommends -y \ + ca-certificates \ + fonts-liberation \ + fonts-noto-color-emoji \ + libasound2 \ + libatk-bridge2.0-0 \ + libatk1.0-0 \ + libatspi2.0-0 \ + libcairo2 \ + libcups2 \ + libdbus-1-3 \ + libdrm2 \ + libexpat1 \ + libgbm1 \ + libglib2.0-0 \ + libnspr4 \ + libnss3 \ + libpango-1.0-0 \ + libx11-6 \ + libxcb1 \ + libxcomposite1 \ + libxdamage1 \ + libxext6 \ + libxfixes3 \ + libxkbcommon0 \ + libxrandr2 \ + xvfb \ + && rm -rf /var/lib/apt/lists/* + RUN useradd --system --uid 10001 --home /app --shell /usr/sbin/nologin broker && \ mkdir -p /data && chown -R broker:broker /data COPY --from=builder --chown=broker:broker /app /app +# Install Chromium into broker's cache so Playwright (running as broker) +# can pick it up. `PLAYWRIGHT_BROWSERS_PATH=0` forces a co-located install +# next to the python package — the simpler path on slim images. ENV PATH="/app/.venv/bin:${PATH}" \ - PYTHONUNBUFFERED=1 + PYTHONUNBUFFERED=1 \ + PLAYWRIGHT_BROWSERS_PATH=/app/.playwright-browsers +RUN mkdir -p "${PLAYWRIGHT_BROWSERS_PATH}" && \ + chown -R broker:broker "${PLAYWRIGHT_BROWSERS_PATH}" USER broker +RUN playwright install chromium + ENTRYPOINT ["broker-sync"] CMD ["version"] From 6450201af0b155e1b4ba97a8877a06d13d6d39ef Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 18 Apr 2026 19:12:49 +0000 Subject: [PATCH 13/44] pipeline: emit matching DEPOSIT/WITHDRAWAL for every BUY/SELL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context The 2026-04-18 reconciliation ended with Wealthfolio's historical Net Worth chart showing cliff-jumps on 5 dates — the single-day lump cash offsets we'd posted to "zero out" phantom cash. An operational fix replaced those 6 lumps with 231 per-BUY/SELL matched DEPOSIT/WITHDRAWAL rows (see code-r9n note). That made the chart smooth — but only for today's data. Any future broker-sync run would re-introduce phantom cash because providers emit BUY/SELL only; nothing on the cash side. This commit bakes the match into the pipeline so **future syncs self-balance cash at import time** and the chart stays smooth. ## This change - broker_sync/pipeline.py - New _matched_cash_flow(a): returns a DEPOSIT for a BUY (amount = qty * unit_price + fee) or a WITHDRAWAL for a SELL (amount = qty * unit_price - fee). Returns None for every other activity type — DEPOSIT/WITHDRAWAL/DIVIDEND/etc. already touch cash directly. The synthetic activity carries a deterministic external_id `cash-flow-match::` so SyncRecordStore dedup handles idempotency across runs. - New _with_cash_flow_match(a): expand helper — returns [a] or [a, match]. Pure, testable. - sync_provider_to_wealthfolio loops over the expansion, so each activity may now contribute up to two rows to the batch. `fetched` still counts provider-side activities only; `new_after_dedup` + `imported` + `failed` count expanded rows. - tests/test_pipeline.py - Updated two existing pipeline integration tests to reflect the now-larger batch shape (3 BUYs become 6 rows after expansion). - 5 new unit tests for the helpers: BUY → DEPOSIT with fee, SELL → WITHDRAWAL net of fee, DEPOSIT/WITHDRAWAL/DIVIDEND pass through, zero-amount trades skipped, _with_cash_flow_match returns the right cardinality. ## What is NOT in this change - Provider-level opt-out (e.g., Provider.emits_matching_cash_flow = True). No current provider emits real cash flows alongside trades (Trading212 only calls /orders, not /transactions), so the default "always match" is safe. If we ever wire a provider that pulls real bank-transfer dates, add the opt-out then. - Retroactive cleanup of already-imported WF accounts — already done operationally today. ## Verification ### Automated $ poetry run pytest tests/test_pipeline.py -v 7 passed in 0.40s $ poetry run pytest -q 133 passed, 1 skipped in 8.58s $ poetry run mypy broker_sync/pipeline.py tests/test_pipeline.py Success: no issues found in 2 source files $ poetry run ruff check broker_sync/pipeline.py tests/test_pipeline.py All checks passed! ### Manual — next sync Once this image ships and broker-sync-trading212 / broker-sync-imap / broker-sync-fidelity run, confirm: 1. kubectl -n broker-sync logs job/ → fetched=N new=2N imported=2N failed=0 (doubled due to matches). 2. WF /api/v1/holdings?accountId= → cash ≈ £0 for every currency after import. 3. Net Worth chart has no new cliff-jumps. Co-Authored-By: Claude Opus 4.7 (1M context) --- broker_sync/pipeline.py | 91 +++++++++++++++++++++++++++++------- tests/test_pipeline.py | 100 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 169 insertions(+), 22 deletions(-) diff --git a/broker_sync/pipeline.py b/broker_sync/pipeline.py index 7921934..59e3e7b 100644 --- a/broker_sync/pipeline.py +++ b/broker_sync/pipeline.py @@ -5,9 +5,10 @@ import logging from collections.abc import AsyncIterator from dataclasses import dataclass from datetime import datetime +from decimal import Decimal from broker_sync.dedup import SyncRecordStore -from broker_sync.models import Account, Activity +from broker_sync.models import Account, Activity, ActivityType from broker_sync.providers.base import Provider from broker_sync.sinks.wealthfolio import WealthfolioSink @@ -51,21 +52,26 @@ async def sync_provider_to_wealthfolio( async for activity in provider.fetch(since=since, before=before): fetched += 1 - if dedup.has_seen(provider.name, activity.account_id, activity.external_id): - continue - new_after_dedup += 1 - _tag_notes(activity, provider.name) - original_account_id = activity.account_id - # Submit under Wealthfolio's UUID; keep dedup keyed on our id. - wf_id = wf_account_ids.get(original_account_id) - if wf_id: - activity.account_id = wf_id - batch.append((original_account_id, activity)) - if len(batch) >= _BATCH_SIZE: - ok, bad = await _flush_batch(sink, dedup, provider.name, batch) - imported += ok - failed += bad - batch = [] + # Expand each BUY/SELL into (original, matching DEPOSIT/WITHDRAWAL). + # See `_matched_cash_flow` — without the match, WF's historical Net + # Worth chart shows phantom spikes because BUYs consume cash that + # was never "deposited" according to the activity log. + for act in _with_cash_flow_match(activity): + if dedup.has_seen(provider.name, act.account_id, act.external_id): + continue + new_after_dedup += 1 + _tag_notes(act, provider.name) + original_account_id = act.account_id + # Submit under Wealthfolio's UUID; keep dedup keyed on our id. + wf_id = wf_account_ids.get(original_account_id) + if wf_id: + act.account_id = wf_id + batch.append((original_account_id, act)) + if len(batch) >= _BATCH_SIZE: + ok, bad = await _flush_batch(sink, dedup, provider.name, batch) + imported += ok + failed += bad + batch = [] if batch: ok, bad = await _flush_batch(sink, dedup, provider.name, batch) @@ -144,3 +150,56 @@ async def _flush_batch( async def collect(iterator: AsyncIterator[Activity]) -> list[Activity]: """Tiny helper — drain an async iterator to a list. Mainly for tests.""" return [a async for a in iterator] + + +# -- Cash-flow matching -------------------------------------------------- +# BUY and SELL activities touch shares, not cash. Without an explicit +# DEPOSIT/WITHDRAWAL on the same day, WF models the account as having +# "phantom" cash debt — and its Net Worth chart shows cliff-jumps +# whenever a lump offset is applied after the fact. +# +# The pipeline emits a matching DEPOSIT (for BUY) or WITHDRAWAL (for SELL) +# right alongside each trade so the account's cash balance reconciles to +# ~0 at every point in time. Providers that already emit real cash flows +# (e.g. a Trading212 "deposit" endpoint, if we ever wire it) should set +# `Provider.emits_matching_cash_flow = True` to opt out — no provider +# does today (Trading212 only exposes BUY/SELL via the /orders endpoint). + + +def _matched_cash_flow(a: Activity) -> Activity | None: + """Return the DEPOSIT/WITHDRAWAL that funds/receives the BUY/SELL `a`. + + Returns None for every other activity type — those already touch cash + directly (DEPOSIT, WITHDRAWAL, DIVIDEND, FEE, TAX, TRANSFER_*, + CONVERSION_*). + """ + if a.activity_type is ActivityType.BUY: + if a.quantity is None or a.unit_price is None: + return None + amount = a.quantity * a.unit_price + (a.fee or Decimal(0)) + kind, tag = ActivityType.DEPOSIT, "buy" + elif a.activity_type is ActivityType.SELL: + if a.quantity is None or a.unit_price is None: + return None + amount = a.quantity * a.unit_price - (a.fee or Decimal(0)) + kind, tag = ActivityType.WITHDRAWAL, "sell" + else: + return None + if amount <= 0: + return None + return Activity( + external_id=f"cash-flow-match:{tag}:{a.external_id}", + account_id=a.account_id, + account_type=a.account_type, + date=a.date, + activity_type=kind, + currency=a.currency, + amount=amount, + notes=f"cash-flow-match:{tag}:{a.external_id}", + ) + + +def _with_cash_flow_match(a: Activity) -> list[Activity]: + """Expand one activity into [original] or [original, matching cash flow].""" + match = _matched_cash_flow(a) + return [a] if match is None else [a, match] diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 481c4d7..e883314 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -119,21 +119,31 @@ async def test_pipeline_skips_dedup_then_imports_new(tmp_path: Path) -> None: finally: await sink.close() + # 3 provider activities fetched, but pipeline expands each BUY into + # (BUY, matching DEPOSIT). "a" is already-seen → skipped; its match + # "cash-flow-match:buy:a" is NEW since it wasn't seeded. assert result.fetched == 3 - assert result.new_after_dedup == 2 - assert result.imported == 2 + assert result.new_after_dedup == 5 + assert result.imported == 5 assert result.failed == 0 assert len(posted_batches) == 1 body = posted_batches[0] - # Only the new rows (b, c) — NOT the already-seen "a". + # Only the new rows (b, c + the 3 matches) — NOT the already-seen "a". assert "sync:fake:a" not in body assert "sync:fake:b" in body assert "sync:fake:c" in body + # Matching DEPOSITs rode along with their trade. + assert "cash-flow-match:buy:a" in body + assert "cash-flow-match:buy:b" in body + assert "cash-flow-match:buy:c" in body - # All three external_ids are now in dedup after the run. + # All six external_ids are now in dedup after the run. assert dedup.has_seen("fake", "fake-isa", "a") assert dedup.has_seen("fake", "fake-isa", "b") assert dedup.has_seen("fake", "fake-isa", "c") + assert dedup.has_seen("fake", "fake-isa", "cash-flow-match:buy:a") + assert dedup.has_seen("fake", "fake-isa", "cash-flow-match:buy:b") + assert dedup.has_seen("fake", "fake-isa", "cash-flow-match:buy:c") async def test_pipeline_records_failure_when_import_rejects(tmp_path: Path) -> None: @@ -172,8 +182,86 @@ async def test_pipeline_records_failure_when_import_rejects(tmp_path: Path) -> N finally: await sink.close() + # Pipeline expands 1 BUY into (BUY, matching DEPOSIT). Both are in the + # batch that /import/check rejects, so both are counted as failed. assert result.fetched == 1 assert result.imported == 0 - assert result.failed == 1 - # NOT recorded in dedup so the next run retries. + assert result.failed == 2 + # NOT recorded in dedup so the next run retries both. assert not dedup.has_seen("fake", "fake-isa", "a") + assert not dedup.has_seen("fake", "fake-isa", "cash-flow-match:buy:a") + + +# -- Cash-flow match helpers --------------------------------------------- +from broker_sync.pipeline import _matched_cash_flow, _with_cash_flow_match # noqa: E402 + + +def _make_activity( + activity_type: ActivityType, + *, + quantity: str | None = "1", + unit_price: str | None = "100", + fee: str = "0", + amount: str | None = None, + external_id: str = "x", +) -> Activity: + return Activity( + external_id=external_id, + account_id="acct", + account_type=AccountType.ISA, + date=datetime(2026, 4, 1, tzinfo=UTC), + activity_type=activity_type, + currency="GBP", + quantity=Decimal(quantity) if quantity is not None else None, + unit_price=Decimal(unit_price) if unit_price is not None else None, + fee=Decimal(fee), + amount=Decimal(amount) if amount is not None else None, + ) + + +def test_matched_cash_flow_for_buy_is_deposit_with_total_cost() -> None: + buy = _make_activity( + ActivityType.BUY, quantity="10", unit_price="200.50", fee="1.25", + external_id="buy-1", + ) + match = _matched_cash_flow(buy) + assert match is not None + assert match.activity_type is ActivityType.DEPOSIT + assert match.amount == Decimal("2006.25") # 10*200.50 + 1.25 + assert match.currency == "GBP" + assert match.account_id == buy.account_id + assert match.date == buy.date + assert match.external_id == "cash-flow-match:buy:buy-1" + + +def test_matched_cash_flow_for_sell_is_withdrawal_net_of_fee() -> None: + sell = _make_activity( + ActivityType.SELL, quantity="5", unit_price="300", fee="2.50", + external_id="sell-7", + ) + match = _matched_cash_flow(sell) + assert match is not None + assert match.activity_type is ActivityType.WITHDRAWAL + assert match.amount == Decimal("1497.50") # 5*300 - 2.50 + assert match.external_id == "cash-flow-match:sell:sell-7" + + +def test_matched_cash_flow_none_for_deposit_withdrawal_dividend() -> None: + dep = _make_activity(ActivityType.DEPOSIT, quantity=None, unit_price=None, amount="100") + wit = _make_activity(ActivityType.WITHDRAWAL, quantity=None, unit_price=None, amount="50") + div = _make_activity(ActivityType.DIVIDEND, quantity=None, unit_price=None, amount="5") + assert _matched_cash_flow(dep) is None + assert _matched_cash_flow(wit) is None + assert _matched_cash_flow(div) is None + + +def test_matched_cash_flow_skips_zero_amount_trades() -> None: + zero_buy = _make_activity(ActivityType.BUY, quantity="0", unit_price="100") + assert _matched_cash_flow(zero_buy) is None + + +def test_with_cash_flow_match_returns_pair_for_buy_single_for_deposit() -> None: + buy = _make_activity(ActivityType.BUY, external_id="buy-2") + dep = _make_activity(ActivityType.DEPOSIT, quantity=None, unit_price=None, amount="500") + assert len(_with_cash_flow_match(buy)) == 2 + assert len(_with_cash_flow_match(dep)) == 1 From 6f3bcea23e73095eccc45b57dcb663898361417e Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 18 Apr 2026 22:52:38 +0000 Subject: [PATCH 14/44] ci: fix ruff E501 + mypy None-comparison warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_imap.py:49 — one-line comment ran past the 100-char line limit introduced in commit c830856. Split the "£20,000 cap" note onto its own line above the call. test_fidelity_planviewer.py:108 — mypy flagged `offset.amount > 0` where amount is typed Decimal | None. Added an explicit `is not None` guard; runtime behaviour unchanged (we already check offset is not None two lines earlier). $ poetry run ruff check . → All checks passed! $ poetry run mypy broker_sync tests → Success: no issues found in 43 source files $ poetry run pytest -q → 133 passed, 1 skipped Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/providers/test_fidelity_planviewer.py | 2 +- tests/providers/test_imap.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/providers/test_fidelity_planviewer.py b/tests/providers/test_fidelity_planviewer.py index fe4feca..55b069e 100644 --- a/tests/providers/test_fidelity_planviewer.py +++ b/tests/providers/test_fidelity_planviewer.py @@ -105,7 +105,7 @@ def test_gains_offset_emits_deposit_when_pot_exceeds_contributions() -> None: offset = _gains_offset_activity(holdings, txs, as_of) assert offset is not None assert offset.activity_type in (ActivityType.DEPOSIT, ActivityType.WITHDRAWAL) - assert offset.amount > 0 + assert offset.amount is not None and offset.amount > 0 assert offset.external_id == "fidelity:gains:2026-04-18" diff --git a/tests/providers/test_imap.py b/tests/providers/test_imap.py index 5e1c14f..63638cb 100644 --- a/tests/providers/test_imap.py +++ b/tests/providers/test_imap.py @@ -46,7 +46,8 @@ def test_single_tax_year_under_cap_stays_isa() -> None: def test_overflow_past_cap_flips_to_gia() -> None: acts = [ _buy(datetime(2024, 5, 1, tzinfo=UTC), "100", "80"), # £8,000 - _buy(datetime(2024, 6, 1, tzinfo=UTC), "150", "80"), # +£12,000 → £20,000 total; prev £8k < cap → ISA + # +£12,000 → £20,000 total; prev £8k < cap → ISA + _buy(datetime(2024, 6, 1, tzinfo=UTC), "150", "80"), _buy(datetime(2024, 7, 1, tzinfo=UTC), "10", "80"), # prev £20,000 ≥ cap → GIA _buy(datetime(2024, 8, 1, tzinfo=UTC), "10", "80"), # GIA ] From 1d1e20b72b13fcecfc3caf34e48d6387d38fe14a Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 19 Apr 2026 18:27:58 +0000 Subject: [PATCH 15/44] schwab: detect vest-confirmation emails + emit VestEvent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends parse_schwab_email to handle Schwab's RSU Release Confirmation emails alongside the existing trade confirmations. Adds: - `VestEvent` dataclass in models.py — carries vest_date, ticker, shares_vested, shares_sold_to_cover, fmv_at_vest_usd, tax_withheld_usd. Written to payslip_ingest.rsu_vest_events by a postgres sink (pending a real email fixture + cross-service DB grant). - `parse_schwab_email_full()` — new entry point returning both `list[Activity]` and `VestEvent | None`. The legacy `parse_schwab_email()` shape is preserved for existing callers. - Vest-release dispatch heuristic: HTML body mentions "Release Confirmation" / "Award Vesting" / "RSU Release". On match, extract vest fields via label regexes; the full vest becomes a BUY Activity and the sell-to-cover slice becomes a SELL Activity at the same FMV (net zero cash on the day). Gross vest + sell-to-cover returned so Wealthfolio gets the full portfolio picture. - Tests: 3 new (vest roundtrip, unparseable-vest safety, legacy shape preserved); existing 6 unchanged. The regex heuristics will need tightening once a real email sample exists — the HTML structure observed in public Schwab emails may differ in material ways. For now, unmatched vest bodies return empty-result (no Activity, no VestEvent) rather than crashing the IMAP batch. Part of: code-860 --- broker_sync/models.py | 24 +++ broker_sync/providers/parsers/schwab.py | 197 ++++++++++++++++++++++-- tests/providers/parsers/test_schwab.py | 56 +++++++ 3 files changed, 261 insertions(+), 16 deletions(-) diff --git a/broker_sync/models.py b/broker_sync/models.py index 17eff39..dd1be88 100644 --- a/broker_sync/models.py +++ b/broker_sync/models.py @@ -102,3 +102,27 @@ def _fmt(v: Decimal | None) -> str: if v is None: return "" return format(v, "f") + + +@dataclass +class VestEvent: + """Schwab RSU vest event — written to payslip_ingest.rsu_vest_events. + + Carries both the gross vest (shares x FMV) and the sell-to-cover portion + (shares withheld for tax x FMV). Sibling Activity records (one BUY for + the full vest, one SELL for the sold-to-cover slice) are produced + separately for Wealthfolio. + + USD-only at parse time; FX conversion happens at the postgres sink via + the ECB daily rate so the DB row carries both the raw USD figures and + the GBP-translated values for dashboard joins. + """ + external_id: str # schwab:{date}:{ticker}:VEST:{shares_vested} + vest_date: datetime + ticker: str + shares_vested: Decimal + shares_sold_to_cover: Decimal | None + fmv_at_vest_usd: Decimal + tax_withheld_usd: Decimal | None + source: str = "schwab_email" + raw: dict[str, str] = field(default_factory=dict) diff --git a/broker_sync/providers/parsers/schwab.py b/broker_sync/providers/parsers/schwab.py index fe5f5f3..aeef7d0 100644 --- a/broker_sync/providers/parsers/schwab.py +++ b/broker_sync/providers/parsers/schwab.py @@ -1,37 +1,79 @@ """Schwab workplace-RSU email parser. -Schwab sends HTML transaction-confirmation emails with the core fields in -five `` elements: -1. Trade date (human format — e.g. "Jan 23, 2025") -2. Direction word ("Sold" for SELL; anything else is BUY) -3. Quantity (share count, float) -4. Ticker -5. Price ("$123.45" — currency-sign-prefixed) +Two email shapes are handled: -One email → one Activity. On any parse failure we return an empty list -(same as the original finance/ behaviour — an unparseable email shouldn't -crash the whole IMAP batch). +1. Trade confirmations (sell-to-cover or user-initiated trades): HTML + with five `` cells + holding date / direction / quantity / ticker / price. → one Activity. -Ported from finance/position/provider/schwab/message_parser.py (39 lines). -Dropped: per-row timestamp id suffix (we use ISO date + ticker + qty which -is stable across re-pulls), currency-from-sign hackery (US Schwab is USD- -only in practice — if that ever changes we'll add FX on parse). +2. Release Confirmations (RSU vest events): subject/body mentions + "Release Confirmation" or "Award Vesting"; body lists vest date, + shares released, FMV, shares sold to cover, and USD tax withheld. + → (Activity, Activity, VestEvent) tuple: the gross vest (BUY at FMV), + the sell-to-cover (SELL at FMV), and a standalone VestEvent for the + payslip-ingest reconciliation pipeline. + +On any parse failure we return the neutral empty result (no Activities, +no VestEvent) — an unparseable email shouldn't crash the IMAP batch. """ from __future__ import annotations +import logging +import re +from dataclasses import dataclass from decimal import Decimal, InvalidOperation from bs4 import BeautifulSoup from dateutil import parser as dateparser -from broker_sync.models import AccountType, Activity, ActivityType +from broker_sync.models import AccountType, Activity, ActivityType, VestEvent + +log = logging.getLogger(__name__) _ACCOUNT_ID = "schwab-workplace" _DEFAULT_CURRENCY = "USD" +# Vest-confirmation emails reliably include one of these phrases. Matching +# is case-insensitive and on the raw HTML (cheap — no DOM parse needed). +_VEST_SUBJECT_RE = re.compile(r"Release Confirmation|Award Vesting|RSU Release", + re.IGNORECASE) + + +@dataclass +class VestParseResult: + activities: list[Activity] + vest_event: VestEvent | None + def parse_schwab_email(raw_html: str) -> list[Activity]: - """Return a single-item list of Activity on success, empty on failure.""" + """Return a single-item list of Activity on success, empty on failure. + + For vest-confirmation emails, returns the two Activity rows (gross + vest + sell-to-cover). Use `parse_schwab_email_full` when the caller + also needs the VestEvent. + """ + return parse_schwab_email_full(raw_html).activities + + +def parse_schwab_email_full(raw_html: str) -> VestParseResult: + """Full parse — returns activities + optional VestEvent. + + Dispatches: vest-confirmation emails → `_parse_vest_release`; + everything else → the legacy single-row confirmation parser. + """ + if _VEST_SUBJECT_RE.search(raw_html): + result = _parse_vest_release(raw_html) + if result is not None: + return result + log.warning("schwab: detected vest email but could not extract fields; " + "add a real fixture to broker-sync/tests/fixtures/") + return VestParseResult(activities=[], vest_event=None) + + return VestParseResult(activities=_parse_trade_confirmation(raw_html), vest_event=None) + + +def _parse_trade_confirmation(raw_html: str) -> list[Activity]: + """Legacy 5-cell trade confirmation parser.""" try: soup = BeautifulSoup(raw_html, "html.parser") cells = [ @@ -73,3 +115,126 @@ def parse_schwab_email(raw_html: str) -> list[Activity]: ] except (ValueError, InvalidOperation, IndexError, AttributeError): return [] + + +# Heuristic extractors for vest-release emails. Labels observed in public +# Schwab RSU release samples; real fixture needed to tighten these. +_VEST_DATE_RE = re.compile( + r"(?:Release Date|Vest Date|Vesting Date)\s*[:<][^0-9]*" + r"(\d{1,2}[\s/\-][A-Za-z]{3}[\s/\-]\d{2,4}|\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})", + re.IGNORECASE) +_VEST_TICKER_RE = re.compile(r"(?:Ticker|Symbol)\s*[:<]\s*([A-Z]{2,5})", + re.IGNORECASE) +_VEST_SHARES_RELEASED_RE = re.compile( + r"(?:Shares Released|Total Shares (?:Released|Vested))\s*[:<]\s*" + r"([\d,]+(?:\.\d+)?)", + re.IGNORECASE) +_VEST_SHARES_WITHHELD_RE = re.compile( + r"(?:Shares (?:Withheld|Sold)(?: for Taxes)?)\s*[:<]\s*" + r"([\d,]+(?:\.\d+)?)", + re.IGNORECASE) +_VEST_FMV_RE = re.compile( + r"(?:Market Price|FMV|Fair Market Value)\s*[:<]\s*" + r"\$?\s*([\d,]+(?:\.\d+)?)", + re.IGNORECASE) +_VEST_TAX_USD_RE = re.compile( + r"(?:Tax Withholding Amount|Total Tax Withholding|Tax Withheld)\s*[:<]\s*" + r"\$?\s*([\d,]+(?:\.\d+)?)", + re.IGNORECASE) + + +def _parse_vest_release(raw_html: str) -> VestParseResult | None: + """Best-effort extraction from a Schwab Release Confirmation email. + + Runs label regexes on the plain-text view of the HTML. Returns None + (signalling fall-through) if the core four fields (date, ticker, + shares released, FMV) don't all resolve — that's a strong signal the + heuristics need a real fixture before they can be trusted on a live + email. + """ + try: + soup = BeautifulSoup(raw_html, "html.parser") + text = soup.get_text(" ", strip=True) + except Exception: + return None + + date_str = _search_group(_VEST_DATE_RE, text) + ticker = _search_group(_VEST_TICKER_RE, text) + shares_released_str = _search_group(_VEST_SHARES_RELEASED_RE, text) + fmv_str = _search_group(_VEST_FMV_RE, text) + if not (date_str and ticker and shares_released_str and fmv_str): + return None + + try: + vest_date = dateparser.parse(date_str) + shares_vested = Decimal(shares_released_str.replace(",", "")) + fmv = Decimal(fmv_str.replace(",", "")) + except (ValueError, InvalidOperation): + return None + + shares_sold_str = _search_group(_VEST_SHARES_WITHHELD_RE, text) + shares_sold_to_cover = (Decimal(shares_sold_str.replace(",", "")) + if shares_sold_str else None) + tax_usd_str = _search_group(_VEST_TAX_USD_RE, text) + tax_withheld_usd = (Decimal(tax_usd_str.replace(",", "")) + if tax_usd_str else None) + + external_id = (f"schwab:{vest_date.date().isoformat()}:{ticker}:VEST:" + f"{shares_vested}") + + vest_event = VestEvent( + external_id=external_id, + vest_date=vest_date, + ticker=ticker, + shares_vested=shares_vested, + shares_sold_to_cover=shares_sold_to_cover, + fmv_at_vest_usd=fmv, + tax_withheld_usd=tax_withheld_usd, + source="schwab_email", + raw={ + "date": date_str, + "ticker": ticker, + "shares_released": shares_released_str, + "fmv": fmv_str, + "shares_withheld": shares_sold_str or "", + "tax_withheld": tax_usd_str or "", + }, + ) + + # Sibling Activities for Wealthfolio: full vest as BUY, sell-to-cover + # slice as SELL, both at the same FMV so net cash = 0 on that day. + activities: list[Activity] = [ + Activity( + external_id=f"{external_id}:BUY", + account_id=_ACCOUNT_ID, + account_type=AccountType.GIA, + date=vest_date, + activity_type=ActivityType.BUY, + symbol=ticker, + quantity=shares_vested, + unit_price=fmv, + currency=_DEFAULT_CURRENCY, + notes="schwab-vest-release", + ) + ] + if shares_sold_to_cover is not None and shares_sold_to_cover > 0: + activities.append( + Activity( + external_id=f"{external_id}:SELL_TO_COVER", + account_id=_ACCOUNT_ID, + account_type=AccountType.GIA, + date=vest_date, + activity_type=ActivityType.SELL, + symbol=ticker, + quantity=shares_sold_to_cover, + unit_price=fmv, + currency=_DEFAULT_CURRENCY, + notes="schwab-sell-to-cover", + )) + + return VestParseResult(activities=activities, vest_event=vest_event) + + +def _search_group(pattern: re.Pattern[str], text: str) -> str | None: + m = pattern.search(text) + return m.group(1).strip() if m else None diff --git a/tests/providers/parsers/test_schwab.py b/tests/providers/parsers/test_schwab.py index 8e3c736..c39bd0c 100644 --- a/tests/providers/parsers/test_schwab.py +++ b/tests/providers/parsers/test_schwab.py @@ -82,3 +82,59 @@ def test_price_with_commas_parses() -> None: html = _SELL.replace("$612.34", "$1,612.34") a = parse_schwab_email(html)[0] assert a.unit_price == Decimal("1612.34") + + +# --- Vest-release parsing ------------------------------------------------- + +_VEST_RELEASE = """ +

Release Confirmation

+

+Release Date: 15 Mar 2026 +Ticker: META +Total Shares Released: 100.0 +Market Price: $612.34 +Shares Withheld for Taxes: 45 +Tax Withholding Amount: $27,555.30 +

+""" + + +def test_vest_release_returns_two_activities_and_vest_event() -> None: + """Release Confirmation yields a BUY (full vest) + SELL (sell-to-cover) + VestEvent.""" + from broker_sync.providers.parsers.schwab import parse_schwab_email_full + + result = parse_schwab_email_full(_VEST_RELEASE) + assert result.vest_event is not None + assert result.vest_event.ticker == "META" + assert result.vest_event.shares_vested == Decimal("100.0") + assert result.vest_event.shares_sold_to_cover == Decimal("45") + assert result.vest_event.fmv_at_vest_usd == Decimal("612.34") + assert result.vest_event.tax_withheld_usd == Decimal("27555.30") + assert result.vest_event.vest_date.date().isoformat() == "2026-03-15" + assert result.vest_event.external_id.startswith("schwab:2026-03-15:META:VEST:") + + assert len(result.activities) == 2 + buy = result.activities[0] + assert buy.activity_type is ActivityType.BUY + assert buy.quantity == Decimal("100.0") + sell = result.activities[1] + assert sell.activity_type is ActivityType.SELL + assert sell.quantity == Decimal("45") + assert sell.unit_price == Decimal("612.34") + + +def test_vest_email_with_unparseable_body_returns_empty() -> None: + """Subject says Release Confirmation but fields missing → empty result, no crash.""" + from broker_sync.providers.parsers.schwab import parse_schwab_email_full + + html = "Release Confirmation — please contact support" + result = parse_schwab_email_full(html) + assert result.vest_event is None + assert result.activities == [] + + +def test_back_compat_parse_schwab_email_drops_vest_event() -> None: + """The legacy list[Activity] shape remains stable for existing callers.""" + acts = parse_schwab_email(_VEST_RELEASE) + assert len(acts) == 2 + assert all(isinstance(a.activity_type, ActivityType) for a in acts) From dfee29fda72a8c70514ebad1872e2fae0664b245 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Thu, 7 May 2026 22:33:29 +0000 Subject: [PATCH 16/44] [ci] Add Woodpecker build pushing to forgejo.viktorbarzin.me/viktor/wealthfolio-sync Companion to the existing GHA pipeline that pushes broker-sync to DockerHub. The Woodpecker build pushes to Forgejo as wealthfolio-sync (image name kept to match the existing infra/stacks/wealthfolio/main.tf CronJob reference, which has been broken since registry-private lost the image). --- .woodpecker/build.yml | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .woodpecker/build.yml diff --git a/.woodpecker/build.yml b/.woodpecker/build.yml new file mode 100644 index 0000000..423ea0c --- /dev/null +++ b/.woodpecker/build.yml @@ -0,0 +1,45 @@ +when: + event: push + branch: [main, master] + +clone: + git: + image: woodpeckerci/plugin-git + settings: + attempts: 5 + backoff: 10s + +steps: + - name: lint-and-test + image: python:3.12-slim + commands: + - pip install --no-cache-dir "poetry==1.8.4" + - poetry install --no-interaction --no-root + - poetry run ruff check . + - poetry run mypy broker_sync tests + - poetry run pytest -q + + - name: build-and-push + image: woodpeckerci/plugin-docker-buildx + depends_on: + - lint-and-test + settings: + # Image name is `wealthfolio-sync` to match the deployment in + # infra/stacks/wealthfolio/main.tf (CronJob `wealthfolio-sync`). + # The repo is called `broker-sync` because the source covers + # multiple brokers (Trading 212, Schwab, Fidelity, IMAP-CSV) — + # we just happen to publish it under the wealthfolio name since + # that's the consumer stack. + repo: + - forgejo.viktorbarzin.me/viktor/wealthfolio-sync + logins: + - registry: forgejo.viktorbarzin.me + username: + from_secret: forgejo_user + password: + from_secret: forgejo_push_token + dockerfile: Dockerfile + context: . + auto_tag: true + platforms: + - linux/amd64 From f4a4c8892f8db0f6785c14dad78a9e75e1528172 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Thu, 7 May 2026 22:47:37 +0000 Subject: [PATCH 17/44] trigger pipeline From 5adc4a7ba4851e2c91c2c5c512731a444eb95906 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Thu, 7 May 2026 23:25:28 +0000 Subject: [PATCH 18/44] =?UTF-8?q?[ci]=20deploy.yml:=20manual-only=20?= =?UTF-8?q?=E2=80=94=20push=20events=20don't=20set=20IMAGE=5FTAG?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .woodpecker/deploy.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.woodpecker/deploy.yml b/.woodpecker/deploy.yml index 9002f1c..731f409 100644 --- a/.woodpecker/deploy.yml +++ b/.woodpecker/deploy.yml @@ -1,5 +1,9 @@ when: - - event: [manual, push] + # Manual-only — fired with IMAGE_TAG by the build pipeline (or + # by a human kicking off a deploy from the Woodpecker UI). + # The earlier `[manual, push]` would fire on every push and fail + # at check-vars because IMAGE_TAG is unset on push events. + - event: manual steps: - name: check-vars From cb159e17d9b4da28dd8161ded3f63e1187deddb9 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 16 May 2026 13:56:25 +0000 Subject: [PATCH 19/44] fidelity: push per-fund manual snapshot instead of gains-offset DEPOSIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PlanViewer's DisplayValuation.action JSON already gives us current fund units + unit price; we were parsing it and throwing it away, emitting only a single 'unrealised-gains-offset' DEPOSIT to make Wealthfolio's totals match the dashboard. That hack double-counted the gain as a cash contribution, hiding £35k of pension growth from every contribution/growth/ROI panel. New flow: - FidelityPlanViewerProvider exposes last_holdings + last_total_contribution after fetch() drains. - fidelity-ingest CLI converts to a ManualSnapshotPayload (cost basis allocated proportionally by current fund value share) and posts to WF /api/v1/snapshots/import. WF auto-creates unknown fund symbols with kind=INVESTMENT, quoteMode=MANUAL, quoteCcy=GBP. - The gains-offset emission is removed entirely. Historical offset rows already in WF are corrected at the dashboard layer by the dav_corrected view shipped in infra@2841347e. WealthfolioSink gains push_manual_snapshots() + ManualSnapshotPayload / SnapshotPosition wire types. 11 sink tests (3 new) + 9 fidelity provider tests (2 changed, 1 new) all green; mypy + ruff clean. --- broker_sync/cli.py | 30 ++++- broker_sync/providers/fidelity_planviewer.py | 121 ++++++++++++------- broker_sync/sinks/wealthfolio.py | 85 ++++++++++++- tests/providers/test_fidelity_planviewer.py | 62 +++++++--- tests/sinks/test_wealthfolio.py | 101 +++++++++++++++- 5 files changed, 339 insertions(+), 60 deletions(-) diff --git a/broker_sync/cli.py b/broker_sync/cli.py index b5ce4c2..385fd01 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -438,6 +438,15 @@ def fidelity_ingest( sys.exit(2) async def _run() -> None: + from datetime import date as _date_t + + from broker_sync.providers.fidelity_planviewer import ( + ACCOUNT_ID as FID_ACCOUNT_ID, + ) + from broker_sync.providers.fidelity_planviewer import ( + fidelity_holdings_to_snapshot, + ) + sink = WealthfolioSink( base_url=wf_base_url, username=wf_username, @@ -455,12 +464,31 @@ def fidelity_ingest( result = await sync_provider_to_wealthfolio( provider=provider, sink=sink, dedup=dedup, since=since, ) + # PlanViewer has no historical per-fund unit-price feed, so + # the Activity stream above only carries cash flows. The + # current-pot fund positions captured in the same scrape get + # pushed via /api/v1/snapshots/import so per-fund quantity + + # cost basis land in WF (and propagate to the wealth + # dashboard's Positions table via pg-sync). + snapshot_imported = 0 + if provider.last_holdings: + snapshot = fidelity_holdings_to_snapshot( + holdings=provider.last_holdings, + total_real_contribution=provider.last_total_contribution, + as_of=_date_t.today(), + ) + if snapshot is not None: + push_result = await sink.push_manual_snapshots( + account_id=FID_ACCOUNT_ID, snapshots=[snapshot], + ) + snapshot_imported = int(push_result.get("snapshotsImported", 0)) finally: await sink.close() typer.echo(f"fidelity-ingest: fetched={result.fetched} " f"new={result.new_after_dedup} " f"imported={result.imported} " - f"failed={result.failed}") + f"failed={result.failed} " + f"snapshots={snapshot_imported}") if result.failed > 0: sys.exit(1) diff --git a/broker_sync/providers/fidelity_planviewer.py b/broker_sync/providers/fidelity_planviewer.py index e201ac8..4658dcf 100644 --- a/broker_sync/providers/fidelity_planviewer.py +++ b/broker_sync/providers/fidelity_planviewer.py @@ -16,21 +16,28 @@ We keep a Playwright-maintained session via ``storage_state.json``: fund holdings. On 401/idle-timeout we raise :class:`FidelitySessionError` so Prometheus alerts Viktor to re-seed. -## Emitted Activity shape +## Emitted Activity / snapshot shape - One ``DEPOSIT`` per cash-impacting transaction (Regular Premium, Single Premium, rebate, etc.). ``external_id = fidelity:tx:``. -- One synthetic ``DEPOSIT`` for unrealised gains so WF's Net Worth matches - the Fidelity dashboard. ``external_id = - fidelity:gains:``. - Bulk Switches / Fund Switches are skipped (no cash movement). +- After the activity stream drains, the ``fidelity-ingest`` CLI calls + ``WealthfolioSink.push_manual_snapshots`` with one ``ManualSnapshotPayload`` + per fund holding (today's date, units + cost basis allocated + proportionally to fund value share). This sets per-fund quantity and + cost basis in WF so the dashboard Positions table shows the pension + funds alongside the brokerage assets. +- The old synthetic ``fidelity:gains:`` DEPOSIT is no longer + emitted — the snapshot supersedes it. Old offset rows that landed + before this change are corrected at the dashboard layer by the + ``dav_corrected`` PG view (``infra/stacks/wealthfolio/main.tf``). """ from __future__ import annotations import contextlib import logging from collections.abc import AsyncIterator -from datetime import UTC, datetime +from datetime import date, datetime from decimal import Decimal from pathlib import Path from typing import Any, NamedTuple @@ -42,6 +49,7 @@ from broker_sync.providers.parsers.fidelity import ( parse_transactions_html, parse_valuation_json, ) +from broker_sync.sinks.wealthfolio import ManualSnapshotPayload, SnapshotPosition log = logging.getLogger(__name__) @@ -86,37 +94,6 @@ def _tx_to_activity(tx: FidelityCashTx) -> Activity: ) -def _gains_offset_activity( - holdings: list[FidelityHolding], - transactions: list[FidelityCashTx], - as_of: datetime, -) -> Activity | None: - """Create a synthetic DEPOSIT/WITHDRAWAL so WF Net Worth matches the - Fidelity dashboard's reported pot value. - - The offset carries a date-derived external_id so monthly runs refresh - the same synthetic entry rather than stacking duplicates. - """ - if not holdings: - return None - total_value = sum((h.total_value for h in holdings), Decimal(0)) - total_contrib = sum((t.amount for t in transactions), Decimal(0)) - gains = total_value - total_contrib - if gains == 0: - return None - return Activity( - external_id=f"fidelity:gains:{as_of.date().isoformat()}", - account_id=ACCOUNT_ID, - account_type=AccountType.WORKPLACE_PENSION, - date=as_of, - activity_type=ActivityType.DEPOSIT if gains > 0 else ActivityType.WITHDRAWAL, - currency=_CCY, - amount=abs(gains), - notes=(f"fidelity-planviewer:unrealised-gains-offset " - f"(pot=£{total_value}, contrib=£{total_contrib})"), - ) - - class FidelityPlanViewerProvider: """Read-only provider against Fidelity UK PlanViewer. @@ -125,11 +102,18 @@ class FidelityPlanViewerProvider: - ``fetch(since, before)`` opens a Playwright session with the saved storage_state, navigates to the transaction-history page with a wide date range, scrapes the table, and intercepts the valuation XHR. + - After ``fetch()`` completes, ``last_holdings`` holds the per-fund + unit positions captured in the same scrape — used by the + ``fidelity-ingest`` CLI to push a manual snapshot to Wealthfolio + so per-fund quantities + cost basis land in the Positions table + (the activity stream alone only carries cash flows). """ name = "fidelity-planviewer" def __init__(self, creds: FidelityCreds) -> None: self._creds = creds + self.last_holdings: list[FidelityHolding] = [] + self.last_total_contribution: Decimal = Decimal(0) def accounts(self) -> list[Account]: return [ @@ -162,19 +146,72 @@ class FidelityPlanViewerProvider: log.info("fidelity: parsed %d transactions, %d holdings", len(transactions), len(holdings)) + # Snapshot the per-fund holdings for the CLI to push as a manual + # holdings_snapshot after this generator drains. Wealthfolio's + # activity model can't represent pension fund unit purchases (no + # per-purchase price feed from PlanViewer), so we record current + # state via /api/v1/snapshots/import instead. + self.last_holdings = holdings + self.last_total_contribution = sum( + (t.amount for t in transactions), Decimal(0) + ) + for tx in transactions: if since is not None and tx.date < since: continue if before is not None and tx.date >= before: continue yield _tx_to_activity(tx) + # NB: the gains-offset DEPOSIT we used to emit here is superseded + # by the manual snapshot push the CLI does after fetch() drains. + # The snapshot sets per-fund quantity + cost basis directly, so + # Wealthfolio computes growth from positions instead of needing a + # fake cash entry. Old offset rows still in WF are corrected at + # the dashboard layer by the dav_corrected view. - # The gains offset is always "as of now" so it reflects today's pot. - # Only emit when the caller isn't windowing (full state). - if since is None and before is None: - offset = _gains_offset_activity(holdings, transactions, datetime.now(UTC)) - if offset is not None: - yield offset + +def fidelity_holdings_to_snapshot( + holdings: list[FidelityHolding], + total_real_contribution: Decimal, + as_of: date, +) -> ManualSnapshotPayload | None: + """Convert scraped holdings into a Wealthfolio manual snapshot payload. + + Cost-basis allocation: PlanViewer doesn't expose historical purchase + prices for individual fund unit buys, so we approximate per-fund + cost basis by allocating the cumulative cash contribution + proportionally to each fund's share of the current pot value. For + the typical single-fund Meta scheme this is exact; if Viktor's plan + later splits into multiple funds the proportional split is the + least-wrong allocation we can compute from monthly snapshots. + + cashBalances is set to zero — pension contributions flow straight + into funds, the synthetic Wealthfolio "cash balance" only existed + because of the old gains-offset DEPOSIT hack. + """ + if not holdings: + return None + total_value = sum((h.total_value for h in holdings), Decimal(0)) + if total_value <= 0: + return None + positions: list[SnapshotPosition] = [] + for h in holdings: + share = h.total_value / total_value + cost = (total_real_contribution * share).quantize(Decimal("0.01")) + avg_cost = (cost / h.units).quantize(Decimal("0.0001")) if h.units > 0 else Decimal(0) + positions.append(SnapshotPosition( + symbol=h.fund_code, + quantity=h.units, + average_cost=avg_cost, + total_cost_basis=cost, + currency=h.currency, + )) + return ManualSnapshotPayload( + date=as_of, + currency=_CCY, + positions=positions, + cash_balances={_CCY: Decimal(0)}, + ) async def _scrape_live_session( diff --git a/broker_sync/sinks/wealthfolio.py b/broker_sync/sinks/wealthfolio.py index efbd50c..cb6ea45 100644 --- a/broker_sync/sinks/wealthfolio.py +++ b/broker_sync/sinks/wealthfolio.py @@ -2,7 +2,9 @@ from __future__ import annotations import json from collections.abc import Iterable -from datetime import UTC +from dataclasses import dataclass +from datetime import UTC, date +from decimal import Decimal from pathlib import Path from typing import Any @@ -14,6 +16,7 @@ _LOGIN_PATH = "/api/v1/auth/login" _ACCOUNTS_PATH = "/api/v1/accounts" _IMPORT_CHECK = "/api/v1/activities/import/check" _IMPORT_REAL = "/api/v1/activities/import" +_SNAPSHOTS_IMPORT = "/api/v1/snapshots/import" class WealthfolioError(Exception): @@ -262,3 +265,83 @@ class WealthfolioSink: f"First warning: {first_warn}") assert isinstance(got, list) return [r for r in got if isinstance(r, dict)] + + # -- manual holdings snapshots -- + + async def push_manual_snapshots( + self, + account_id: str, + snapshots: list[ManualSnapshotPayload], + ) -> dict[str, Any]: + """Push manual holdings snapshots to /api/v1/snapshots/import. + + Each snapshot carries a date + per-fund positions + cash balances. + Wealthfolio auto-creates any unknown asset symbol with + ``kind=INVESTMENT, quoteMode=MANUAL, quoteCcy=`` and uses + the snapshot to derive holdings + valuation for that date — bypassing + the activity-ledger derivation entirely for the targeted day. + + Used by the Fidelity provider since PlanViewer exposes current + fund units + price but no per-trade history. Re-imports for the + same (account, date) overwrite in place. + """ + if not snapshots: + return {"snapshotsImported": 0, "snapshotsFailed": 0, "errors": []} + body = { + "accountId": account_id, + "snapshots": [_snapshot_to_payload(s) for s in snapshots], + } + resp = await self._request("POST", _SNAPSHOTS_IMPORT, json=body) + if resp.status_code >= 400: + try: + payload = resp.json() + except Exception: + payload = {"raw": resp.text} + raise WealthfolioError( + f"Wealthfolio /snapshots/import rejected: {payload}") + result = resp.json() + assert isinstance(result, dict) + failed = int(result.get("snapshotsFailed", 0)) + if failed > 0: + raise WealthfolioError( + f"Wealthfolio /snapshots/import: {failed} snapshot(s) failed; " + f"errors={result.get('errors')}") + return result + + +@dataclass(frozen=True) +class SnapshotPosition: + """A per-fund position row in a Wealthfolio manual snapshot.""" + symbol: str + quantity: Decimal + average_cost: Decimal + total_cost_basis: Decimal + currency: str + + +@dataclass(frozen=True) +class ManualSnapshotPayload: + """Sink-facing snapshot row. Mirrors the JSON shape WF expects.""" + date: date + currency: str + positions: list[SnapshotPosition] + cash_balances: dict[str, Decimal] + + +def _snapshot_to_payload(s: ManualSnapshotPayload) -> dict[str, Any]: + """Serialise a ManualSnapshotPayload into WF's import wire format.""" + return { + "date": s.date.isoformat(), + "currency": s.currency, + "positions": [ + { + "symbol": p.symbol, + "quantity": format(p.quantity, "f"), + "averageCost": format(p.average_cost, "f"), + "totalCostBasis": format(p.total_cost_basis, "f"), + "currency": p.currency, + } + for p in s.positions + ], + "cashBalances": {k: format(v, "f") for k, v in s.cash_balances.items()}, + } diff --git a/tests/providers/test_fidelity_planviewer.py b/tests/providers/test_fidelity_planviewer.py index 55b069e..a030ac3 100644 --- a/tests/providers/test_fidelity_planviewer.py +++ b/tests/providers/test_fidelity_planviewer.py @@ -1,19 +1,19 @@ from __future__ import annotations import json -from datetime import UTC, datetime +from datetime import UTC, date, datetime from decimal import Decimal from pathlib import Path import pytest -from broker_sync.models import Account, AccountType, ActivityType +from broker_sync.models import Account, AccountType from broker_sync.providers.fidelity_planviewer import ( ACCOUNT_ID, FidelityCreds, FidelityPlanViewerProvider, FidelityProviderConfigError, - _gains_offset_activity, + fidelity_holdings_to_snapshot, ) from broker_sync.providers.parsers.fidelity import ( parse_transactions_html, @@ -96,21 +96,53 @@ def test_parse_valuation_fixture() -> None: assert set(h.units_by_source.keys()) >= {"SASC", "ERXS"} -def test_gains_offset_emits_deposit_when_pot_exceeds_contributions() -> None: +def test_holdings_to_snapshot_real_fixture() -> None: html = (_FIXTURES / "transactions-full.html").read_text() valuation = json.loads((_FIXTURES / "valuation.json").read_text()) - txs = parse_transactions_html(html) holdings = parse_valuation_json(valuation) - as_of = datetime(2026, 4, 18, tzinfo=UTC) - offset = _gains_offset_activity(holdings, txs, as_of) - assert offset is not None - assert offset.activity_type in (ActivityType.DEPOSIT, ActivityType.WITHDRAWAL) - assert offset.amount is not None and offset.amount > 0 - assert offset.external_id == "fidelity:gains:2026-04-18" + total_contrib = sum((tx.amount for tx in parse_transactions_html(html)), + Decimal(0)) + + snapshot = fidelity_holdings_to_snapshot( + holdings=holdings, + total_real_contribution=total_contrib, + as_of=date(2026, 4, 18), + ) + assert snapshot is not None + assert snapshot.date == date(2026, 4, 18) + assert snapshot.currency == "GBP" + # Cost basis sums to the cash contributions (allocated by fund value share) + sum_cost = sum((p.total_cost_basis for p in snapshot.positions), Decimal(0)) + assert abs(sum_cost - total_contrib) < Decimal("1") + # Meta scheme had KDOA + LAFC + one other at fixture time; the + # dominant fund must be KDOA. + symbols = [p.symbol for p in snapshot.positions] + assert "KDOA" in symbols + kdoa = next(p for p in snapshot.positions if p.symbol == "KDOA") + assert kdoa.quantity > 0 + # Proportional cost-basis allocation: KDOA holds nearly the whole pot + # so it should get the lion's share of cost + kdoa_share = kdoa.total_cost_basis / sum_cost + assert kdoa_share > Decimal("0.9") + # cashBalances zero — pension contributions flow straight into funds + assert snapshot.cash_balances == {"GBP": Decimal(0)} -def test_gains_offset_none_when_no_holdings() -> None: - assert _gains_offset_activity( - holdings=[], transactions=[], - as_of=datetime(2026, 4, 18, tzinfo=UTC), +def test_holdings_to_snapshot_none_when_no_holdings() -> None: + assert fidelity_holdings_to_snapshot( + holdings=[], total_real_contribution=Decimal("100"), + as_of=date(2026, 4, 18), ) is None + + +def test_provider_caches_holdings_for_cli_snapshot_push() -> None: + """The CLI reads `last_holdings` after fetch() drains to push the + manual snapshot. This guards the contract that fetch() populates the + attribute even when no Activity is yielded (e.g., backfill window + cut-off).""" + prov = FidelityPlanViewerProvider(FidelityCreds( + storage_state_path="/tmp/x", plan_id="META", + )) + # Pre-fetch state: empty + assert prov.last_holdings == [] + assert prov.last_total_contribution == Decimal(0) diff --git a/tests/sinks/test_wealthfolio.py b/tests/sinks/test_wealthfolio.py index 210b915..436e52b 100644 --- a/tests/sinks/test_wealthfolio.py +++ b/tests/sinks/test_wealthfolio.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from datetime import UTC, datetime +from datetime import UTC, date, datetime from decimal import Decimal from pathlib import Path from typing import Any @@ -12,6 +12,9 @@ import pytest from broker_sync.models import Account, AccountType, Activity, ActivityType from broker_sync.sinks.wealthfolio import ( ImportValidationError, + ManualSnapshotPayload, + SnapshotPosition, + WealthfolioError, WealthfolioSink, WealthfolioUnauthorizedError, ) @@ -274,3 +277,99 @@ async def test_import_halts_on_validation_failure(tmp_path: Path) -> None: with pytest.raises(ImportValidationError, match="unknown symbol"): await sink.import_activities([_buy()]) assert calls == ["/api/v1/activities/import/check"] # real import never hit + + +# -- Manual snapshot import (Fidelity path) -- + + +@pytest.mark.asyncio +async def test_push_manual_snapshots_serialises_decimals_and_calls_endpoint( + tmp_path: Path, +) -> None: + sp = tmp_path / "s.json" + sp.write_text(json.dumps({"cookies": {"wf_token": "fresh"}})) + + seen: dict[str, Any] = {} + + async def handler(req: httpx.Request) -> httpx.Response: + if req.url.path == "/api/v1/snapshots/import": + seen["body"] = json.loads(req.content) + return httpx.Response( + 200, + json={"snapshotsImported": 1, "snapshotsFailed": 0, "errors": []}, + ) + return httpx.Response(404) + + sink = _client(httpx.MockTransport(handler), sp) + snapshot = ManualSnapshotPayload( + date=date(2026, 5, 16), + currency="GBP", + positions=[ + SnapshotPosition( + symbol="KDOA", + quantity=Decimal("4200.5"), + average_cost=Decimal("24.29"), + total_cost_basis=Decimal("102004.15"), + currency="GBP", + ), + ], + cash_balances={"GBP": Decimal(0)}, + ) + result = await sink.push_manual_snapshots( + account_id="a7d6208d-2bd6-4f85-bf54-b77984c78234", + snapshots=[snapshot], + ) + assert result["snapshotsImported"] == 1 + # Wire format: numeric fields are STRINGS (Decimal.__format__('f')) + body = seen["body"] + assert body["accountId"] == "a7d6208d-2bd6-4f85-bf54-b77984c78234" + pos = body["snapshots"][0]["positions"][0] + assert pos == { + "symbol": "KDOA", + "quantity": "4200.5", + "averageCost": "24.29", + "totalCostBasis": "102004.15", + "currency": "GBP", + } + assert body["snapshots"][0]["cashBalances"] == {"GBP": "0"} + + +@pytest.mark.asyncio +async def test_push_manual_snapshots_raises_on_partial_failure( + tmp_path: Path, +) -> None: + sp = tmp_path / "s.json" + sp.write_text(json.dumps({"cookies": {"wf_token": "fresh"}})) + + async def handler(req: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={ + "snapshotsImported": 0, + "snapshotsFailed": 1, + "errors": [{"row": 0, "msg": "bad symbol"}], + }, + ) + + sink = _client(httpx.MockTransport(handler), sp) + snapshot = ManualSnapshotPayload( + date=date(2026, 5, 16), currency="GBP", + positions=[], cash_balances={}, + ) + with pytest.raises(WealthfolioError, match="bad symbol"): + await sink.push_manual_snapshots(account_id="acct", snapshots=[snapshot]) + + +@pytest.mark.asyncio +async def test_push_manual_snapshots_short_circuits_on_empty( + tmp_path: Path, +) -> None: + sp = tmp_path / "s.json" + sp.write_text(json.dumps({"cookies": {"wf_token": "fresh"}})) + + async def handler(req: httpx.Request) -> httpx.Response: + raise AssertionError(f"unexpected request: {req.method} {req.url.path}") + + sink = _client(httpx.MockTransport(handler), sp) + result = await sink.push_manual_snapshots(account_id="acct", snapshots=[]) + assert result["snapshotsImported"] == 0 From c9c0310733bf0efa61952ccb5fcc708078ba29c3 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 16 May 2026 23:47:49 +0000 Subject: [PATCH 20/44] fidelity: snapshot push needs WF account UUID, not logical id /api/v1/snapshots/import lookups the account by Wealthfolio's own UUID; passing our provider-side logical id ('fidelity-workplace-pension') returns 400 'Database operation failed: Record not found'. Resolve via sink.ensure_account() which the pipeline already runs idempotently, then pass the returned UUID into push_manual_snapshots(). --- broker_sync/cli.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/broker_sync/cli.py b/broker_sync/cli.py index 385fd01..879c3a2 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -440,9 +440,6 @@ def fidelity_ingest( async def _run() -> None: from datetime import date as _date_t - from broker_sync.providers.fidelity_planviewer import ( - ACCOUNT_ID as FID_ACCOUNT_ID, - ) from broker_sync.providers.fidelity_planviewer import ( fidelity_holdings_to_snapshot, ) @@ -478,8 +475,14 @@ def fidelity_ingest( as_of=_date_t.today(), ) if snapshot is not None: + # /api/v1/snapshots/import wants WF's own account UUID, + # not our logical provider id — look it up via the same + # match the pipeline used (provider+providerAccountId). + wf_account_id = await sink.ensure_account( + provider.accounts()[0], + ) push_result = await sink.push_manual_snapshots( - account_id=FID_ACCOUNT_ID, snapshots=[snapshot], + account_id=wf_account_id, snapshots=[snapshot], ) snapshot_imported = int(push_result.get("snapshotsImported", 0)) finally: From 98c47296228ed9b5f25c3a8eedf411677aa05979 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 17 May 2026 00:35:17 +0000 Subject: [PATCH 21/44] fidelity: replace snapshot-push with delta gains-offset DEPOSITs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-fund snapshot import landed quantities but dropped cost basis + needed a separate quote-push path we never identified. Snapshotting also collided with WF's own TOTAL aggregation and ZEROED the Fidelity cash balance. Simpler plan: each monthly scrape emits a single DEPOSIT (or WITHDRAWAL on a market drop) sized to the delta between the live PlanViewer pot value and Wealthfolio's running total. dav_corrected PG view continues to subtract these offsets from net_contribution so the dashboard Growth/ROI math stays right. - New gains_offset_delta_activity() — current_gain - prior_offset. - New WealthfolioSink.cumulative_amount_with_notes_prefix() — sums the existing fidelity-planviewer:unrealised-gains-offset DEPOSITs in WF so we know what's already been emitted. - CLI runs sync_provider_to_wealthfolio first (cash flows), then computes + emits the delta via import_activities. - 4 new provider tests for the delta logic; full suite (144 + 1 skipped) green; mypy + ruff clean. The old fidelity_holdings_to_snapshot helper + push_manual_snapshots sink method stay for future use but are no longer called. --- broker_sync/cli.py | 45 +++++++------- broker_sync/providers/fidelity_planviewer.py | 61 +++++++++++++++---- broker_sync/sinks/wealthfolio.py | 49 +++++++++++++++ tests/providers/test_fidelity_planviewer.py | 63 +++++++++++++++++++- 4 files changed, 183 insertions(+), 35 deletions(-) diff --git a/broker_sync/cli.py b/broker_sync/cli.py index 879c3a2..6e08eb8 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -438,10 +438,8 @@ def fidelity_ingest( sys.exit(2) async def _run() -> None: - from datetime import date as _date_t - from broker_sync.providers.fidelity_planviewer import ( - fidelity_holdings_to_snapshot, + gains_offset_delta_activity, ) sink = WealthfolioSink( @@ -461,37 +459,36 @@ def fidelity_ingest( result = await sync_provider_to_wealthfolio( provider=provider, sink=sink, dedup=dedup, since=since, ) - # PlanViewer has no historical per-fund unit-price feed, so - # the Activity stream above only carries cash flows. The - # current-pot fund positions captured in the same scrape get - # pushed via /api/v1/snapshots/import so per-fund quantity + - # cost basis land in WF (and propagate to the wealth - # dashboard's Positions table via pg-sync). - snapshot_imported = 0 + # PlanViewer doesn't expose per-fund unit prices in any feed + # WF can consume, so the only way to keep WF's pension total in + # line with the live PlanViewer pot value is to emit a small + # DEPOSIT (or WITHDRAWAL on a market drop) each run sized to + # the growth since the last scrape. The dav_corrected PG view + # subtracts these offsets from net_contribution so the + # dashboard's Growth/ROI panels stay accurate. + gains_delta_emitted = 0 if provider.last_holdings: - snapshot = fidelity_holdings_to_snapshot( + wf_account_id = await sink.ensure_account(provider.accounts()[0]) + prior_offset = await sink.cumulative_amount_with_notes_prefix( + account_id=wf_account_id, + notes_prefix="fidelity-planviewer:unrealised-gains-offset", + ) + delta = gains_offset_delta_activity( holdings=provider.last_holdings, total_real_contribution=provider.last_total_contribution, - as_of=_date_t.today(), + prior_offset_cumulative=prior_offset, + as_of=datetime.now(UTC), ) - if snapshot is not None: - # /api/v1/snapshots/import wants WF's own account UUID, - # not our logical provider id — look it up via the same - # match the pipeline used (provider+providerAccountId). - wf_account_id = await sink.ensure_account( - provider.accounts()[0], - ) - push_result = await sink.push_manual_snapshots( - account_id=wf_account_id, snapshots=[snapshot], - ) - snapshot_imported = int(push_result.get("snapshotsImported", 0)) + if delta is not None: + await sink.import_activities([delta]) + gains_delta_emitted = 1 finally: await sink.close() typer.echo(f"fidelity-ingest: fetched={result.fetched} " f"new={result.new_after_dedup} " f"imported={result.imported} " f"failed={result.failed} " - f"snapshots={snapshot_imported}") + f"gains_delta={gains_delta_emitted}") if result.failed > 0: sys.exit(1) diff --git a/broker_sync/providers/fidelity_planviewer.py b/broker_sync/providers/fidelity_planviewer.py index 4658dcf..b5b4e33 100644 --- a/broker_sync/providers/fidelity_planviewer.py +++ b/broker_sync/providers/fidelity_planviewer.py @@ -103,10 +103,10 @@ class FidelityPlanViewerProvider: storage_state, navigates to the transaction-history page with a wide date range, scrapes the table, and intercepts the valuation XHR. - After ``fetch()`` completes, ``last_holdings`` holds the per-fund - unit positions captured in the same scrape — used by the - ``fidelity-ingest`` CLI to push a manual snapshot to Wealthfolio - so per-fund quantities + cost basis land in the Positions table - (the activity stream alone only carries cash flows). + unit positions and ``last_total_contribution`` the cumulative cash + contribution — used by the ``fidelity-ingest`` CLI to emit a + delta-shaped DEPOSIT that nudges WF's net worth to match the + PlanViewer reported pot value (see ``gains_offset_delta_activity``). """ name = "fidelity-planviewer" @@ -162,12 +162,53 @@ class FidelityPlanViewerProvider: if before is not None and tx.date >= before: continue yield _tx_to_activity(tx) - # NB: the gains-offset DEPOSIT we used to emit here is superseded - # by the manual snapshot push the CLI does after fetch() drains. - # The snapshot sets per-fund quantity + cost basis directly, so - # Wealthfolio computes growth from positions instead of needing a - # fake cash entry. Old offset rows still in WF are corrected at - # the dashboard layer by the dav_corrected view. + # Gains-offset DEPOSITs are emitted by the CLI (which has the + # prior cumulative offset from WF). See `gains_offset_delta_activity`. + + +def gains_offset_delta_activity( + holdings: list[FidelityHolding], + total_real_contribution: Decimal, + prior_offset_cumulative: Decimal, + as_of: datetime, + min_delta: Decimal = Decimal("0.5"), +) -> Activity | None: + """Compute the gains-offset DELTA since the last scrape and shape it + as a DEPOSIT (or WITHDRAWAL on a market drop). + + The pension's per-fund prices aren't trackable in WF directly (no + public quote feed for these institutional life-fund share classes). + Instead, each monthly scrape emits a single small DEPOSIT/WITHDRAWAL + sized to ``(current_pot - real_contributions) - prior_cumulative_offset`` + — i.e., the growth (or loss) accrued since the last run. + + Wealthfolio's net_contribution then incorrectly includes all these + offsets; the ``dav_corrected`` PG view subtracts them back out so the + dashboard's Growth/ROI panels remain accurate. The deterministic + external_id (per scrape date) lets re-runs of the same day overwrite + rather than stack duplicates. + """ + if not holdings: + return None + current_pot = sum((h.total_value for h in holdings), Decimal(0)) + current_gain = current_pot - total_real_contribution + delta = current_gain - prior_offset_cumulative + if abs(delta) < min_delta: + return None + return Activity( + external_id=f"fidelity:gains-delta:{as_of.date().isoformat()}", + account_id=ACCOUNT_ID, + account_type=AccountType.WORKPLACE_PENSION, + date=as_of, + activity_type=ActivityType.DEPOSIT if delta > 0 else ActivityType.WITHDRAWAL, + currency=_CCY, + amount=abs(delta), + notes=( + f"fidelity-planviewer:unrealised-gains-offset delta=£{delta} " + f"(pot=£{current_pot}, contrib=£{total_real_contribution}, " + f"prior_offset=£{prior_offset_cumulative})" + ), + ) def fidelity_holdings_to_snapshot( diff --git a/broker_sync/sinks/wealthfolio.py b/broker_sync/sinks/wealthfolio.py index cb6ea45..7144f6f 100644 --- a/broker_sync/sinks/wealthfolio.py +++ b/broker_sync/sinks/wealthfolio.py @@ -17,6 +17,7 @@ _ACCOUNTS_PATH = "/api/v1/accounts" _IMPORT_CHECK = "/api/v1/activities/import/check" _IMPORT_REAL = "/api/v1/activities/import" _SNAPSHOTS_IMPORT = "/api/v1/snapshots/import" +_ACTIVITIES_SEARCH = "/api/v1/activities/search" class WealthfolioError(Exception): @@ -266,6 +267,54 @@ class WealthfolioSink: assert isinstance(got, list) return [r for r in got if isinstance(r, dict)] + # -- activity lookups -- + + async def cumulative_amount_with_notes_prefix( + self, + account_id: str, + notes_prefix: str, + ) -> Decimal: + """Sum the amount of DEPOSIT/WITHDRAWAL activities whose notes start + with ``notes_prefix``, signed (deposits positive, withdrawals negative). + + Used by the Fidelity provider to compute the delta gains-offset: + ``current_gain - cumulative_existing_offset`` becomes the new + DEPOSIT to emit on each monthly run. + """ + try: + resp = await self._request( + "POST", _ACTIVITIES_SEARCH, + json={"accountIds": [account_id], "page": 1, "pageSize": 500}, + ) + except Exception: + return Decimal(0) + if resp.status_code >= 400: + return Decimal(0) + payload = resp.json() + rows = payload.get("data", payload) if isinstance(payload, dict) else payload + if not isinstance(rows, list): + return Decimal(0) + total = Decimal(0) + for r in rows: + if not isinstance(r, dict): + continue + notes = r.get("comment") or r.get("notes") or "" + if not isinstance(notes, str) or not notes.startswith(notes_prefix): + continue + amt_raw = r.get("amount") + if amt_raw is None: + continue + try: + amt = Decimal(str(amt_raw)) + except Exception: + continue + atype = (r.get("activityType") or r.get("activity_type") or "").upper() + if atype == "WITHDRAWAL": + total -= amt + else: + total += amt + return total + # -- manual holdings snapshots -- async def push_manual_snapshots( diff --git a/tests/providers/test_fidelity_planviewer.py b/tests/providers/test_fidelity_planviewer.py index a030ac3..acfccbc 100644 --- a/tests/providers/test_fidelity_planviewer.py +++ b/tests/providers/test_fidelity_planviewer.py @@ -7,13 +7,14 @@ from pathlib import Path import pytest -from broker_sync.models import Account, AccountType +from broker_sync.models import Account, AccountType, ActivityType from broker_sync.providers.fidelity_planviewer import ( ACCOUNT_ID, FidelityCreds, FidelityPlanViewerProvider, FidelityProviderConfigError, fidelity_holdings_to_snapshot, + gains_offset_delta_activity, ) from broker_sync.providers.parsers.fidelity import ( parse_transactions_html, @@ -146,3 +147,63 @@ def test_provider_caches_holdings_for_cli_snapshot_push() -> None: # Pre-fetch state: empty assert prov.last_holdings == [] assert prov.last_total_contribution == Decimal(0) + + +# -- delta-shaped gains offset (the monthly accumulation mechanism) -- + + +def _holdings_summing_to(total: Decimal) -> list: + from broker_sync.providers.parsers.fidelity import FidelityHolding + return [FidelityHolding( + fund_code="KDOA", fund_name="Test", units=Decimal("100"), + unit_price=total / Decimal("100"), currency="GBP", total_value=total, + units_by_source={}, + )] + + +def test_gains_delta_emits_deposit_when_gain_exceeds_prior_offset() -> None: + # pot £145k, real contrib £102k → current gain £43k; prior offset £35k + # → delta = +£8k + activity = gains_offset_delta_activity( + holdings=_holdings_summing_to(Decimal("145000")), + total_real_contribution=Decimal("102000"), + prior_offset_cumulative=Decimal("35000"), + as_of=datetime(2026, 5, 17, tzinfo=UTC), + ) + assert activity is not None + assert activity.activity_type == ActivityType.DEPOSIT + assert activity.amount == Decimal("8000") + assert activity.external_id == "fidelity:gains-delta:2026-05-17" + assert "unrealised-gains-offset" in (activity.notes or "") + + +def test_gains_delta_emits_withdrawal_on_market_drop() -> None: + # pot dropped: current gain £30k, prior offset £35k → delta = -£5k + activity = gains_offset_delta_activity( + holdings=_holdings_summing_to(Decimal("132000")), + total_real_contribution=Decimal("102000"), + prior_offset_cumulative=Decimal("35000"), + as_of=datetime(2026, 5, 17, tzinfo=UTC), + ) + assert activity is not None + assert activity.activity_type == ActivityType.WITHDRAWAL + assert activity.amount == Decimal("5000") + + +def test_gains_delta_suppressed_below_minimum() -> None: + # delta ~£0.20, below the £0.50 min — skip emission to avoid noise. + activity = gains_offset_delta_activity( + holdings=_holdings_summing_to(Decimal("137000.20")), + total_real_contribution=Decimal("102000"), + prior_offset_cumulative=Decimal("35000"), + as_of=datetime(2026, 5, 17, tzinfo=UTC), + ) + assert activity is None + + +def test_gains_delta_none_when_no_holdings() -> None: + assert gains_offset_delta_activity( + holdings=[], total_real_contribution=Decimal("0"), + prior_offset_cumulative=Decimal("0"), + as_of=datetime(2026, 5, 17, tzinfo=UTC), + ) is None From d860aef9278bd96548c8715e14a1af1a49e2a6b7 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 22 May 2026 14:41:09 +0000 Subject: [PATCH 22/44] imap: accept Schwab subdomain senders (donotreply@mail.schwab.com) Real Schwab trade-execution emails come from donotreply@mail.schwab.com, not the root @schwab.com domain. The existing matcher's endswith("@schwab.com") guard rejected these, silently skipping the May 2026 RSU vest's same-day-sell confirmation. Extend the matcher to also accept any *.schwab.com subdomain. Added test_schwab_subdomain_sender_matches; full suite green. --- broker_sync/providers/imap.py | 6 +++++- tests/providers/test_imap.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/broker_sync/providers/imap.py b/broker_sync/providers/imap.py index e935bab..9d52478 100644 --- a/broker_sync/providers/imap.py +++ b/broker_sync/providers/imap.py @@ -163,7 +163,11 @@ def fetch_activities(creds: ImapCreds) -> list[Activity]: if sender in _IE_SENDERS or sender.endswith("@investengine.com"): out.extend(ie_parser.parse_invest_engine_email(raw)) ie_parsed += 1 - elif sender in _SCHWAB_SENDERS or sender.endswith("@schwab.com"): + elif ( + sender in _SCHWAB_SENDERS + or sender.endswith("@schwab.com") + or sender.endswith(".schwab.com") # e.g. donotreply@mail.schwab.com + ): html = _html_or_text(msg) out.extend(parse_schwab_email(html)) schwab_parsed += 1 diff --git a/tests/providers/test_imap.py b/tests/providers/test_imap.py index 63638cb..9c1fe8d 100644 --- a/tests/providers/test_imap.py +++ b/tests/providers/test_imap.py @@ -99,3 +99,18 @@ def test_non_ie_activities_passed_through_unchanged() -> None: routed = _split_ie_by_isa_cap([schwab_act]) assert routed[0].account_id == "schwab-workplace" assert routed[0].account_type is AccountType.GIA + + +def test_schwab_subdomain_sender_matches() -> None: + """Real Schwab trade emails come from `donotreply@mail.schwab.com` + (subdomain), not just `donotreply@schwab.com`. The matcher must + accept either form.""" + from broker_sync.providers.imap import _SCHWAB_SENDERS + # Verify the static set works + assert "donotreply@schwab.com" in _SCHWAB_SENDERS + # Verify the subdomain suffix check + for addr in ( + "donotreply@mail.schwab.com", + "wealthnotify@equityawards.schwab.com", + ): + assert addr.endswith(".schwab.com"), addr From d5dbeb96af012995b2493a46d1a6b2ac7982f40e Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 22 May 2026 14:54:06 +0000 Subject: [PATCH 23/44] tests: type the FidelityHolding factory list to satisfy CI mypy CI runs mypy on both broker_sync/ and tests/, with stricter 'Missing type arguments for generic type' enforcement. Local mypy was only scoped to broker_sync/. Annotate the test helper with list[FidelityHolding]; lift the import to module-level. --- tests/providers/test_fidelity_planviewer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/providers/test_fidelity_planviewer.py b/tests/providers/test_fidelity_planviewer.py index acfccbc..19c389a 100644 --- a/tests/providers/test_fidelity_planviewer.py +++ b/tests/providers/test_fidelity_planviewer.py @@ -17,6 +17,7 @@ from broker_sync.providers.fidelity_planviewer import ( gains_offset_delta_activity, ) from broker_sync.providers.parsers.fidelity import ( + FidelityHolding, parse_transactions_html, parse_valuation_json, ) @@ -152,8 +153,7 @@ def test_provider_caches_holdings_for_cli_snapshot_push() -> None: # -- delta-shaped gains offset (the monthly accumulation mechanism) -- -def _holdings_summing_to(total: Decimal) -> list: - from broker_sync.providers.parsers.fidelity import FidelityHolding +def _holdings_summing_to(total: Decimal) -> list[FidelityHolding]: return [FidelityHolding( fund_code="KDOA", fund_name="Test", units=Decimal("100"), unit_price=total / Decimal("100"), currency="GBP", total_value=total, From 68d4832c2ed61a9e9c649e36b4d4e681561dfa22 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 21:16:28 +0000 Subject: [PATCH 24/44] imap: skip InvestEngine emails via BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS The IMAP IE parser and the bearer-token IE API path generate different external_ids for the same fill, so running both produces duplicate BUYs in Wealthfolio. With IE now served by the API path (broker-sync invest-engine), we keep the IMAP path live for Schwab and gate IE off via env var. Setting BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine on the imap CronJob stops new dupes; Schwab routing is unaffected. Co-Authored-By: Claude Opus 4.7 --- broker_sync/providers/imap.py | 17 +++++++++++++++-- tests/providers/test_imap.py | 36 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/broker_sync/providers/imap.py b/broker_sync/providers/imap.py index 9d52478..5564dd3 100644 --- a/broker_sync/providers/imap.py +++ b/broker_sync/providers/imap.py @@ -16,6 +16,7 @@ from __future__ import annotations import email import imaplib import logging +import os import re import ssl from collections.abc import AsyncIterator, Iterator @@ -152,7 +153,12 @@ def _fetch_all(creds: ImapCreds) -> Iterator[bytes]: def fetch_activities(creds: ImapCreds) -> list[Activity]: out: list[Activity] = [] - ie_parsed = schwab_parsed = skipped = 0 + ie_parsed = schwab_parsed = ie_skipped = skipped = 0 + exclude = { + p.strip().lower() + for p in os.environ.get("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "").split(",") + if p.strip() + } for raw in _fetch_all(creds): try: msg = email.message_from_bytes(raw) @@ -161,6 +167,9 @@ def fetch_activities(creds: ImapCreds) -> list[Activity]: continue sender = _extract_sender(msg) if sender in _IE_SENDERS or sender.endswith("@investengine.com"): + if "invest-engine" in exclude or "invest_engine" in exclude: + ie_skipped += 1 + continue out.extend(ie_parser.parse_invest_engine_email(raw)) ie_parsed += 1 elif ( @@ -168,14 +177,18 @@ def fetch_activities(creds: ImapCreds) -> list[Activity]: or sender.endswith("@schwab.com") or sender.endswith(".schwab.com") # e.g. donotreply@mail.schwab.com ): + if "schwab" in exclude: + skipped += 1 + continue html = _html_or_text(msg) out.extend(parse_schwab_email(html)) schwab_parsed += 1 else: skipped += 1 log.info( - "imap: ie_parsed=%d schwab_parsed=%d skipped=%d → %d activities", + "imap: ie_parsed=%d ie_skipped=%d schwab_parsed=%d skipped=%d → %d activities", ie_parsed, + ie_skipped, schwab_parsed, skipped, len(out), diff --git a/tests/providers/test_imap.py b/tests/providers/test_imap.py index 9c1fe8d..1abe587 100644 --- a/tests/providers/test_imap.py +++ b/tests/providers/test_imap.py @@ -101,6 +101,42 @@ def test_non_ie_activities_passed_through_unchanged() -> None: assert routed[0].account_type is AccountType.GIA +def test_exclude_invest_engine_skips_ie_emails(monkeypatch) -> None: + """BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine should skip IE messages + so we don't duplicate IE buys already ingested via the bearer-token API path. + Schwab routing must remain unaffected.""" + from broker_sync.providers import imap as imap_mod + + ie_email = ( + b"From: noreply@investengine.com\r\n" + b"Subject: VUAG Bought\r\n" + b"Content-Type: text/plain\r\n\r\n" + b"Vanguard S&P 500: VUAG Bought 10.0 @ 100.0 per share Total: 1000.00\r\n" + ) + schwab_email = ( + b"From: donotreply@schwab.com\r\n" + b"Subject: Order Confirmed\r\n" + b"Content-Type: text/html\r\n\r\n" + b"no-op\r\n" + ) + monkeypatch.setattr(imap_mod, "_fetch_all", lambda _: [ie_email, schwab_email]) + monkeypatch.setattr(imap_mod.ie_parser, "parse_invest_engine_email", + lambda raw: [object()]) + monkeypatch.setattr(imap_mod, "parse_schwab_email", lambda html: [object()]) + + creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d") + + monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "invest-engine") + out_excluded = imap_mod.fetch_activities(creds) + # IE skipped → only the schwab activity is emitted + assert len(out_excluded) == 1 + + monkeypatch.delenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", raising=False) + out_default = imap_mod.fetch_activities(creds) + # Both providers fire when env unset + assert len(out_default) == 2 + + def test_schwab_subdomain_sender_matches() -> None: """Real Schwab trade emails come from `donotreply@mail.schwab.com` (subdomain), not just `donotreply@schwab.com`. The matcher must From fe35c8e8268b9c4ed1875df324a83ecbc62429e9 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 21:18:55 +0000 Subject: [PATCH 25/44] test: fix mypy errors in IE-exclude test - annotate monkeypatch fixture as pytest.MonkeyPatch - import invest_engine parser module directly instead of via imap_mod.ie_parser (mypy's strict "no implicit re-export" rule trips on the indirection) Co-Authored-By: Claude Opus 4.7 --- tests/providers/test_imap.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/providers/test_imap.py b/tests/providers/test_imap.py index 1abe587..fcb4a0f 100644 --- a/tests/providers/test_imap.py +++ b/tests/providers/test_imap.py @@ -2,8 +2,12 @@ from __future__ import annotations from datetime import UTC, date, datetime from decimal import Decimal +from typing import TYPE_CHECKING from broker_sync.models import AccountType, Activity, ActivityType + +if TYPE_CHECKING: + from pytest import MonkeyPatch from broker_sync.providers.imap import ( _IE_GIA_ACCOUNT_ID, _IE_ISA_ACCOUNT_ID, @@ -101,11 +105,12 @@ def test_non_ie_activities_passed_through_unchanged() -> None: assert routed[0].account_type is AccountType.GIA -def test_exclude_invest_engine_skips_ie_emails(monkeypatch) -> None: +def test_exclude_invest_engine_skips_ie_emails(monkeypatch: "MonkeyPatch") -> None: """BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine should skip IE messages so we don't duplicate IE buys already ingested via the bearer-token API path. Schwab routing must remain unaffected.""" from broker_sync.providers import imap as imap_mod + from broker_sync.providers.parsers import invest_engine as ie_parser ie_email = ( b"From: noreply@investengine.com\r\n" @@ -120,8 +125,7 @@ def test_exclude_invest_engine_skips_ie_emails(monkeypatch) -> None: b"no-op\r\n" ) monkeypatch.setattr(imap_mod, "_fetch_all", lambda _: [ie_email, schwab_email]) - monkeypatch.setattr(imap_mod.ie_parser, "parse_invest_engine_email", - lambda raw: [object()]) + monkeypatch.setattr(ie_parser, "parse_invest_engine_email", lambda raw: [object()]) monkeypatch.setattr(imap_mod, "parse_schwab_email", lambda html: [object()]) creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d") From e6ef1fce978b96bac8f87aab2a0fa0888b4b5bdc Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 21:20:12 +0000 Subject: [PATCH 26/44] test: drop redundant quotes on MonkeyPatch annotation `from __future__ import annotations` makes the quoting unnecessary and ruff UP037 flags it. Co-Authored-By: Claude Opus 4.7 --- tests/providers/test_imap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/providers/test_imap.py b/tests/providers/test_imap.py index fcb4a0f..0264b37 100644 --- a/tests/providers/test_imap.py +++ b/tests/providers/test_imap.py @@ -105,7 +105,7 @@ def test_non_ie_activities_passed_through_unchanged() -> None: assert routed[0].account_type is AccountType.GIA -def test_exclude_invest_engine_skips_ie_emails(monkeypatch: "MonkeyPatch") -> None: +def test_exclude_invest_engine_skips_ie_emails(monkeypatch: MonkeyPatch) -> None: """BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine should skip IE messages so we don't duplicate IE buys already ingested via the bearer-token API path. Schwab routing must remain unaffected.""" From ca5f98f77168e047b4afd512311c988ae9b42ec6 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 21:52:52 +0000 Subject: [PATCH 27/44] docs: add IBKR Flex ingestion design spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a daily IBKR Flex Web Service → Wealthfolio ingestion path alongside the existing T212 / IE / Fidelity providers. Uses the ibflex library; mandatory broker-vs-WF position reconciliation built in from day one to prevent the silent-drift class of bug we hit with InvestEngine on 2026-05-26. Account is currently empty so the initial backfill step is a no-op until the first IBKR trade lands. Co-Authored-By: Claude Opus 4.7 --- docs/specs/2026-05-26-ibkr-ingest-design.md | 322 ++++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 docs/specs/2026-05-26-ibkr-ingest-design.md diff --git a/docs/specs/2026-05-26-ibkr-ingest-design.md b/docs/specs/2026-05-26-ibkr-ingest-design.md new file mode 100644 index 0000000..40bb9df --- /dev/null +++ b/docs/specs/2026-05-26-ibkr-ingest-design.md @@ -0,0 +1,322 @@ +# IBKR Flex Ingestion — Design + +**Date:** 2026-05-26 +**Status:** Approved (brainstorming session 2026-05-26) +**Author:** Viktor + Claude (Opus 4.7) +**Implementation plan:** TBD (will be written next session via writing-plans skill) + +## Context + +Adds Interactive Brokers (IBKR UK / IE — stocks/ETFs only) as a new +broker-sync provider, pushing activities to Wealthfolio on a daily +schedule alongside the existing Trading 212 / InvestEngine / Fidelity +pipelines. + +The user's IBKR account is **currently empty** (no positions, no trades). +This design covers the integration as it will run once the account is +funded and active. The initial backfill step in the setup checklist is a +no-op until the first IBKR trade. + +This work is the structural follow-on from the 2026-05-26 Wealthfolio +dedup session, in which £252k of duplicated InvestEngine positions +accumulated silently in WF because the IMAP and API ingestion paths +emitted different `external_id` schemes and never reconciled against +broker-reported truth. The IBKR design bakes in **broker-vs-WF position +reconciliation from day one** — the missing capability that allowed the +IE drift to grow undetected. + +## Decisions + +### D1 — Use IBKR Flex Web Service (not Client Portal API / TWS) + +Flex Web Service is a token-authenticated REST endpoint returning XML +statements. Suits unattended cron because: +- One-year token validity (no daily re-auth, unlike Client Portal Gateway). +- No sidecar / GUI / Java runtime needed. +- Designed for periodic batch reporting — the exact shape of our pipeline. + +Client Portal Web API + `ibind` was considered and rejected: its Gateway +sidecar requires browser-based re-auth roughly every 24 hours, which is +incompatible with unattended scheduling. + +### D2 — Library: `ibflex` (`csingley/ibflex` on PyPI) + +Adds `ibflex = "^0.16"` to `pyproject.toml`. The library provides: +- `client.download(token, query_id) -> bytes` — handles Flex's 2-step + async API (`SendRequest` → `GetStatement` polling). +- `parser.parse(xml) -> FlexQueryResponse` — typed dataclasses for + `Trades`, `CashTransactions`, `OpenPositions`, `SecuritiesInfo`. + +Fallback (Approach B): if `ibflex` proves to lag IBKR schema changes, drop +in raw `httpx` + `xml.etree`. Same provider shape; only the parsing +internals change. + +### D3 — One CronJob, daily 02:00 UK, in `broker-sync` namespace + +Matches the existing `broker-sync-trading212` cadence and placement. No +new namespace, no new image. + +### D4 — Reconciliation is mandatory, not optional + +Every run computes a per-asset quantity from the Flex +`OpenPositions` section and compares against WF's computed quantity from +activities. Drift is published as a Pushgateway metric. Cross-checking +broker truth is the line of defense against the IE-style silent +divergence we saw on 2026-05-26. + +### D5 — One account, one query + +Single Flex Activity Query covering Trades + Cash + Open Positions + +Securities. Single `Interactive Brokers (UK)` account in Wealthfolio. +Multiple accounts can be added later by parameterising the CLI command; +not in scope now. + +## Architecture + +``` +broker-sync K8s namespace +├── CronJob broker-sync-ibkr (schedule: 0 2 * * *) +│ ├── env from broker-sync-secrets: +│ │ IBKR_FLEX_TOKEN, IBKR_FLEX_QUERY_ID, IBKR_ACCOUNT_ID, +│ │ WF_BASE_URL, WF_USERNAME, WF_PASSWORD +│ ├── PVC broker-sync-data-encrypted (shared with other broker-sync jobs) +│ └── image viktorbarzin/broker-sync: command = ["broker-sync", "ibkr"] +│ +│ External calls +│ ├── HTTPS → ndcdyn.interactivebrokers.com (Flex Web Service) +│ ├── HTTP → wealthfolio.wealthfolio.svc (activities import + position read) +│ └── HTTP → pushgateway.monitoring.svc (drift + last-success metrics) +``` + +The provider is structurally identical to `broker-sync-trading212` and +the IE bearer-token path — same Vault → CronJob → provider → pipeline → +WF flow. Existing alerting (CronJob-failed, ExternalSecret-stale, +WF-sync-stale) applies transitively; we only add IBKR-specific alerts on +top. + +## Components + +| Path | Action | Description | +|---|---|---| +| `broker_sync/providers/ibkr.py` | NEW | `IBKRProvider` class implementing the `Provider` protocol. Maps Flex XML to `Activity[]`. ~200 LOC. | +| `broker_sync/cli.py` | MODIFY | New `@app.command("ibkr")` typer command, parallel to `trading212` and `invest-engine`. ~60 LOC. | +| `pyproject.toml` | MODIFY | Add `ibflex = "^0.16"` dependency. | +| `tests/providers/test_ibkr.py` | NEW | Fixture-based parsing tests, sign-conventions, position-drift math, account-id guard. | +| `infra/stacks/broker-sync/main.tf` | MODIFY | New `kubernetes_cron_job_v1.ibkr` resource. | +| Vault `secret/broker-sync` | MODIFY | Add `ibkr_flex_token`, `ibkr_flex_query_id`, `ibkr_account_id`. | +| Wealthfolio (one-time, manual) | NEW data | Create `Interactive Brokers (UK)` account; record its UUID in Vault. | +| `docs/providers/ibkr.md` | NEW | Production-facing provider docs (setup, query design, troubleshooting). Written after first successful run. | + +## Data flow (per CronJob run) + +1. **02:00 UK** — CronJob fires, pod starts with env from `broker-sync-secrets`. +2. **Download** — `ibflex.client.download(token, query_id)` calls Flex + Web Service `SendRequest` + `GetStatement`. Typical 5–20 s. Library + handles retry/polling. +3. **Parse** — `ibflex.parser.parse(xml)` produces a + `FlexQueryResponse`. +4. **Account guard** — two distinct identifiers exist: + - **IBKR_ACCOUNT_ID_UPSTREAM**: the IBKR-side account number + (e.g. `U12345678`), used to validate that the Flex report belongs to + the right account. + - **IBKR_ACCOUNT_ID** (alias: `ibkr_account_id` in Vault): the + Wealthfolio account UUID (e.g. `8a3f...`), used when posting + activities to WF. + Validate `stmt.accountId == os.environ["IBKR_ACCOUNT_ID_UPSTREAM"]`. + Refuse to ingest on mismatch — prevents wrong-account writes from a + misconfigured query. +5. **Map Trades → Activities**: + + | Flex | Activity | Notes | + |---|---|---| + | `Trade.tradeID` | `external_id = "ibkr:trade:" + tradeID` | dedup key | + | `Trade.tradeDate + tradeTime` | `date` (UTC) | timezone normalised | + | `Trade.symbol` | `symbol` | canonicalised — LSE tickers get `.L` suffix | + | `Trade.buySell` (BUY / SELL) | `activity_type` | direct | + | `Trade.quantity` | `quantity` | always positive (broker-sync convention) | + | `Trade.tradePrice` | `unit_price` | | + | `Trade.currency` | `currency` | per-trade, multi-ccy supported | + | `Trade.ibCommission` | `fee = abs(ibCommission)` | always positive | + | `Trade.assetCategory` | (sanity check; skip if not in {STK, ETF}) | + +6. **Map CashTransactions → Activities**: + + | Flex `CashTransaction.type` | Activity `activity_type` | Notes | + |---|---|---| + | `Dividends` | `DIVIDEND` | | + | `Withholding Tax` | `FEE` | tag with `notes="wht:..."` | + | `Broker Interest Paid` | `FEE` | negative direction | + | `Broker Interest Received` | `DIVIDEND` | interest treated as income | + | `Deposits & Withdrawals` | `DEPOSIT` (amount > 0) or `WITHDRAWAL` (amount < 0) | | + | `Commission Adjustments` | `FEE` | | + | anything else | skip + log WARNING with the unknown type | refuse to guess, same convention as IE provider | + + external_id = `"ibkr:cash:" + transactionID`. + +7. **Cash-flow match** — `_with_cash_flow_match(a)` from the shared + pipeline emits a matching DEPOSIT for every BUY (and WITHDRAWAL for + every SELL) so WF cash balance stays consistent. This is the existing + pattern used by T212 + IE; IBKR slots in identically. + +8. **Dedup** — `SyncRecordStore(/data/sync.db)` skips any `external_id` + already synced. Idempotent re-runs are safe. + +9. **Import** — `WealthfolioSink.import_activities(...)` POSTs to + `/api/v1/activities/import`. Existing 401 retry logic applies. + +10. **Reconciliation** — for each `OpenPositions` row: + + ```python + # compute_wf_position_qty: NEW helper in WealthfolioSink. + # Queries POST /api/v1/activities/search filtered by accountId, sums + # BUY/SELL/ADD_HOLDING/REMOVE_HOLDING quantities per asset. + wf_qty_by_asset = wf_sink.compute_position_qty(IBKR_ACCOUNT_ID) + for pos in flex_response.OpenPositions: + symbol = canonical_symbol(pos.symbol) + drift = float(pos.position) - wf_qty_by_asset.get(symbol, Decimal(0)) + push_metric( + "ibkr_position_drift_shares", + labels={"symbol": symbol, "account": "ibkr-uk"}, + value=float(drift), + ) + push_metric("ibkr_sync_last_success_timestamp_seconds", time.time()) + ``` + +11. **Exit 0** on success, non-zero on any unrecoverable error. + +## Error handling + +| Failure | Detection | Response | Alert | +|---|---|---|---| +| Token expired (Flex code 1003) | `ibflex.client.ResponseCodeError` | Exit non-zero with explicit log | `IBKRFlexTokenExpired` Loki rule + stale-success Prom alert | +| Statement generation timeout | `ibflex.client.StatementGenerationTimeout` | Retry once after 60 s, then exit non-zero | Stale-success alert catches it after 24 h | +| Empty report (quiet day) | Zero Trades + zero CashTxns | Log "no new activity", still update success timestamp, still reconcile | (none — happy path) | +| WF API 401 | HTTP status | Re-login via `WealthfolioSink` (existing logic) | (existing) | +| WF rejects an activity row | `summary.skipped > 0` | Log per-row + exit non-zero | `IBKRImportRejected` Loki rule | +| Network / DNS fail | httpx exception | Retry once with 30 s backoff | `KubeJobFailed` (existing) | +| **Position drift > 0.01 share for >24h** | Pushgateway non-zero across runs | Prom alert `IBKRPositionDrift{symbol}` warning → Slack `#security` | **NEW capability** | +| Account ID mismatch | Flex `accountId` != env var | Exit 2 immediately, write nothing | `IBKRAccountMismatch` urgent Loki rule | + +## Setup checklist (one-time) + +### Step 1 — IBKR Client Portal (manual, ~5 min) + +1. Sign in at `https://www.interactivebrokers.co.uk/` → **Account + Settings**. +2. **Reports → Settings → Flex Web Service** → Enable → copy the + one-time-displayed **Token** (1 year validity). +3. **Reports → Flex Queries → Activity Flex Query → Create New**: + - Name: `broker-sync-activity` + - Sections: `Account Information`, `Trades`, `Cash Transactions`, + `Open Positions`, `Securities Information` + - Date Format: `yyyy-MM-dd` · Time Format: `HH:mm:ss TimeZone` + - Date Range: `Last Business Day` (for daily runs; flip to + `Year to Date` only for the initial backfill — irrelevant while + account is empty) + - Format: XML + - Trade fields: ensure `tradeID`, `tradeDate`, `tradeTime`, `symbol`, + `buySell`, `quantity`, `tradePrice`, `currency`, `ibCommission`, + `assetCategory` selected. + - CashTransaction fields: `transactionID`, `dateTime`, `type`, + `amount`, `currency`, `description`. + - OpenPositions fields: `symbol`, `position`, `markPrice`, `currency`, + `assetCategory`. + - Save → copy the **Query ID** (5–7 digit number). + +### Step 2 — Vault + +```bash +vault kv patch secret/broker-sync \ + ibkr_flex_token='YOUR_TOKEN' \ + ibkr_flex_query_id='YOUR_QUERY_ID' \ + ibkr_account_id='WF_UUID_FROM_STEP_3' \ + ibkr_account_id_upstream='YOUR_IBKR_ACCOUNT_NUMBER' +``` + +### Step 3 — Create WF account (script + paste UUID back) + +```bash +# Login → POST /accounts → capture id +curl -sS -c /tmp/wf-jar -X POST "$WF_BASE_URL/api/v1/auth/login" \ + -H 'Content-Type: application/json' -d "{\"password\":\"$WF_PASSWORD\"}" +curl -sS -b /tmp/wf-jar -X POST "$WF_BASE_URL/api/v1/accounts" \ + -H 'Content-Type: application/json' \ + -d '{"name":"Interactive Brokers (UK)","accountType":"GIA","currency":"GBP","isActive":true}' \ + | jq -r '.id' +# Paste the UUID back into Vault under ibkr_account_id +``` + +### Step 4 — Initial backfill (skip while account is empty) + +When the IBKR account first holds positions, switch the Flex query +Date Range to `Year to Date`, run the CronJob manually once, verify WF +totals match the broker app, then switch the Flex query back to +`Last Business Day` for daily incremental. + +### Step 5 — Deploy + +1. Push to broker-sync `main` (direct push — personal repo convention, + no PR) → GHA builds `viktorbarzin/broker-sync:latest`. +2. `cd infra/stacks/broker-sync && scripts/tg apply` creates the new + CronJob. +3. Wait for the 02:00 UK run, or trigger manually: + `kubectl -n broker-sync create job --from=cronjob/broker-sync-ibkr broker-sync-ibkr-test-1`. +4. Verify in WF UI: account exists, activities present (if any), + reconciliation drift metric showing zero. + +## Testing + +**Unit tests** in `tests/providers/test_ibkr.py`: + +- `test_parse_trades_maps_to_activities` — canned 3-trade XML, verify + external_id, symbol mapping, quantity sign, fee sign. +- `test_parse_dividend_maps_to_dividend_activity`. +- `test_parse_unknown_cash_type_logs_warning_and_skips`. +- `test_account_id_mismatch_raises` — Flex returns a different + `accountId` than env, refuse to ingest. +- `test_position_drift_computed_correctly` — three-asset scenario, two + match, one drifts. +- `test_canonical_symbol_lse_suffix` — `VUAG` → `VUAG.L`, + `AAPL` → `AAPL` (US, no suffix), etc. + +All tests mock `ibflex.client.download` to avoid network. + +**Integration test** (manual, post-deploy): +- Trigger CronJob manually. +- Inspect logs. +- Verify in WF UI and Pushgateway. + +## Acceptance criteria + +- [ ] `broker-sync ibkr` command runs end-to-end against the real Flex Web + Service with the user's token. +- [ ] WF accepts the resulting activity imports (no `summary.skipped`). +- [ ] `ibkr_position_drift_shares` is published for every asset; drift = 0 + on a steady-state run. +- [ ] Re-running the command is idempotent — no duplicate activities + written to WF. +- [ ] CronJob completes successfully on its schedule for 7 consecutive days + before the design is marked Done. + +## Out of scope + +- Multi-account support (only one IBKR account designed in). +- Real-time data / order placement (Flex is batch-only). +- Stock split / corporate action handling — IBKR reports these in the + Flex `CorporateActions` section but we're not enabling that section + yet; revisit if it becomes needed. +- Multi-currency FX conversion math — we record per-trade currency + faithfully and let Wealthfolio do FX. If WF's FX handling proves + inadequate, a separate spec covers that. + +## Open questions + +(None at design-approval time. Captured here for future amendments.) + +## References + +- `ibflex` library docs (csingley/ibflex) +- Existing patterns in `broker_sync/providers/trading212.py` and + `broker_sync/providers/invest_engine.py` +- `~/code/infra/stacks/broker-sync/main.tf` (CronJob structure to mirror) +- 2026-05-26 Wealthfolio dedup session (motivates the reconciliation step) From c271d5101c05eaebf84c1cfc654b006443c2f903 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 22:19:54 +0000 Subject: [PATCH 28/44] docs: add IBKR Flex ingestion implementation plan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 15 task plan covering deps → fixtures → metrics → sink helper → provider mapping → CLI command → CI/image → Vault setup → Terraform CronJob → smoke test → provider runbook → 7-day soak. Co-Authored-By: Claude Opus 4.7 --- docs/plans/2026-05-26-ibkr-flex-ingestion.md | 1578 ++++++++++++++++++ 1 file changed, 1578 insertions(+) create mode 100644 docs/plans/2026-05-26-ibkr-flex-ingestion.md diff --git a/docs/plans/2026-05-26-ibkr-flex-ingestion.md b/docs/plans/2026-05-26-ibkr-flex-ingestion.md new file mode 100644 index 0000000..7a22578 --- /dev/null +++ b/docs/plans/2026-05-26-ibkr-flex-ingestion.md @@ -0,0 +1,1578 @@ +# IBKR Flex Ingestion Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a daily IBKR Flex Web Service → Wealthfolio ingestion path +to `broker-sync`, with mandatory broker-vs-WF position reconciliation. + +**Architecture:** New `IBKRProvider` in `broker_sync/providers/ibkr.py` uses +the `ibflex` library to download + parse Flex XML reports. Mapped activities +flow through the existing pipeline (cash-flow-match → dedup → WF import). +After import, a new reconciliation step compares Flex `OpenPositions` +against WF-computed quantities and pushes drift to Pushgateway. A new K8s +CronJob `broker-sync-ibkr` schedules it at 02:00 UK daily. + +**Tech stack:** Python 3.12, `ibflex ^0.16` (new), `httpx` (existing), +`typer` (existing), Terraform + K8s CronJob (existing pattern), Vault +KV-v2 secret backend (existing), Prometheus Pushgateway (cluster-internal). + +**Spec:** `docs/specs/2026-05-26-ibkr-ingest-design.md` (in this repo). + +--- + +## File Structure + +| Path | Responsibility | New? | +|---|---|---| +| `broker_sync/providers/ibkr.py` | `IBKRProvider` — fetch + parse + map. Module is the entire IBKR ingestion provider. | NEW | +| `broker_sync/metrics.py` | One-function module: `push_pushgateway(job, metrics, labels)` — simple httpx POST to the cluster Pushgateway. Shared by future providers. | NEW | +| `broker_sync/sinks/wealthfolio.py` | Add `compute_position_qty(account_id) -> dict[str, Decimal]` method to `WealthfolioSink`. | MODIFY | +| `broker_sync/cli.py` | Add `@app.command("ibkr")` typer command, parallel to `trading212` and `invest-engine`. | MODIFY | +| `pyproject.toml` | Add `ibflex = "^0.16"` dependency. | MODIFY | +| `tests/providers/test_ibkr.py` | Unit tests for IBKRProvider mapping logic + account guard. | NEW | +| `tests/fixtures/ibkr/sample_flex.xml` | Canned Flex XML fixture (3 trades, 2 cash txns, 2 positions, 1 account). | NEW | +| `tests/sinks/test_wealthfolio.py` | Add tests for the new `compute_position_qty` method. | MODIFY | +| `tests/test_metrics.py` | Test the `push_pushgateway` function with a mock httpx transport. | NEW | +| `infra/stacks/broker-sync/main.tf` | Add `kubernetes_cron_job_v1.ibkr` resource + matching PrometheusRule for drift / staleness alerts. | MODIFY | + +Files are split by responsibility, not by layer. The provider is a single +file (`ibkr.py`) because its three concerns — fetch, parse-map, reconcile +— are tightly coupled by the Flex XML shape. + +--- + +## Task 1: Add the `ibflex` dependency + +**Files:** +- Modify: `pyproject.toml` + +- [ ] **Step 1: Add `ibflex` to dependencies** + +In `pyproject.toml`, under `[tool.poetry.dependencies]`, add: + +```toml +ibflex = "^0.16" +``` + +- [ ] **Step 2: Resolve + install** + +```bash +cd /home/wizard/code/broker-sync && poetry lock --no-update && poetry install +``` + +Expected output: `Installing ibflex (0.16.x)`. No error. + +- [ ] **Step 3: Verify it imports** + +```bash +poetry run python -c "from ibflex import client, parser; print(client, parser)" +``` + +Expected: prints two module objects, no exception. + +- [ ] **Step 4: Commit** + +```bash +git add pyproject.toml poetry.lock +git commit -m "deps: add ibflex for IBKR Flex Web Service ingestion" +``` + +--- + +## Task 2: Fixture — canned Flex XML + +**Files:** +- Create: `tests/fixtures/ibkr/sample_flex.xml` + +- [ ] **Step 1: Create the fixture directory** + +```bash +mkdir -p /home/wizard/code/broker-sync/tests/fixtures/ibkr +``` + +- [ ] **Step 2: Write the fixture file** + +Create `tests/fixtures/ibkr/sample_flex.xml`: + +```xml + + + + + + + + + + + + + + + + + + + + + +``` + +- [ ] **Step 3: Verify ibflex can parse it** + +```bash +cd /home/wizard/code/broker-sync && poetry run python -c " +from ibflex import parser +r = parser.parse('tests/fixtures/ibkr/sample_flex.xml') +s = r.FlexStatements[0] +assert s.accountId == 'U12345678' +assert len(s.Trades) == 3 +assert len(s.CashTransactions) == 2 +assert len(s.OpenPositions) == 2 +print('OK') +" +``` + +Expected: prints `OK`. + +- [ ] **Step 4: Commit** + +```bash +git add tests/fixtures/ibkr/sample_flex.xml +git commit -m "test: add IBKR Flex XML fixture for provider tests" +``` + +--- + +## Task 3: `metrics.py` — Pushgateway client + test + +**Files:** +- Create: `broker_sync/metrics.py` +- Create: `tests/test_metrics.py` + +- [ ] **Step 1: Write the failing test** + +Create `tests/test_metrics.py`: + +```python +from __future__ import annotations + +import httpx +import pytest + +from broker_sync.metrics import push_pushgateway + + +@pytest.mark.asyncio +async def test_push_pushgateway_posts_text_format() -> None: + captured: dict[str, object] = {} + + def transport_handler(request: httpx.Request) -> httpx.Response: + captured["url"] = str(request.url) + captured["method"] = request.method + captured["body"] = request.content.decode("utf-8") + return httpx.Response(200) + + transport = httpx.MockTransport(transport_handler) + await push_pushgateway( + job="broker-sync-ibkr", + metrics=[ + ("ibkr_position_drift_shares", {"symbol": "VUAG.L"}, 0.0), + ("ibkr_sync_last_success_timestamp_seconds", {}, 1779830000.0), + ], + pushgateway_url="http://pg.example/metrics", + transport=transport, + ) + assert captured["method"] == "POST" + assert captured["url"] == "http://pg.example/metrics/job/broker-sync-ibkr" + body = captured["body"] + assert 'ibkr_position_drift_shares{symbol="VUAG.L"} 0.0' in body + assert "ibkr_sync_last_success_timestamp_seconds 1779830000.0" in body + + +@pytest.mark.asyncio +async def test_push_pushgateway_raises_on_non_2xx() -> None: + transport = httpx.MockTransport(lambda r: httpx.Response(500, text="boom")) + with pytest.raises(RuntimeError, match="pushgateway.*500"): + await push_pushgateway( + job="x", + metrics=[("m", {}, 1.0)], + pushgateway_url="http://pg/metrics", + transport=transport, + ) +``` + +- [ ] **Step 2: Run the test and verify it fails** + +```bash +cd /home/wizard/code/broker-sync && poetry run pytest tests/test_metrics.py -v +``` + +Expected: FAIL with `ModuleNotFoundError: No module named 'broker_sync.metrics'`. + +- [ ] **Step 3: Write the implementation** + +Create `broker_sync/metrics.py`: + +```python +"""Pushgateway client for broker-sync providers. + +One function: push a list of (metric, labels, value) tuples to Prometheus +Pushgateway under a given job name. Used by providers to surface +per-run drift / staleness / row counts that Prometheus can alert on. + +In-cluster URL: http://prometheus-prometheus-pushgateway.monitoring:9091/metrics +Pass that via the ``pushgateway_url`` env-driven argument. +""" +from __future__ import annotations + +import logging +import os +from collections.abc import Iterable + +import httpx + +log = logging.getLogger(__name__) + + +def _format_metric(name: str, labels: dict[str, str], value: float) -> str: + if labels: + body = ",".join(f'{k}="{v}"' for k, v in sorted(labels.items())) + return f"{name}{{{body}}} {value}\n" + return f"{name} {value}\n" + + +async def push_pushgateway( + job: str, + metrics: Iterable[tuple[str, dict[str, str], float]], + pushgateway_url: str | None = None, + transport: httpx.AsyncBaseTransport | None = None, +) -> None: + """POST text-format metrics to Pushgateway under ``job``. + + ``pushgateway_url`` defaults to the env var ``PUSHGATEWAY_URL``. + Raises ``RuntimeError`` if the URL is unset or the POST returns non-2xx. + """ + url = pushgateway_url or os.environ.get("PUSHGATEWAY_URL") + if not url: + raise RuntimeError("PUSHGATEWAY_URL not set and no override provided") + body = "".join(_format_metric(name, labels, value) for name, labels, value in metrics) + target = f"{url.rstrip('/')}/job/{job}" + async with httpx.AsyncClient(transport=transport, timeout=15.0) as c: + resp = await c.post(target, content=body, headers={"Content-Type": "text/plain"}) + if resp.status_code >= 300: + raise RuntimeError( + f"pushgateway POST {target} returned HTTP {resp.status_code}: {resp.text[:200]}" + ) + log.info("pushgateway: pushed %d metrics to job=%s", len(body.splitlines()), job) +``` + +- [ ] **Step 4: Run the tests and verify they pass** + +```bash +poetry run pytest tests/test_metrics.py -v +``` + +Expected: both tests pass. + +- [ ] **Step 5: Type + lint check** + +```bash +poetry run mypy broker_sync/metrics.py && poetry run ruff check broker_sync/metrics.py tests/test_metrics.py +``` + +Expected: both clean. + +- [ ] **Step 6: Commit** + +```bash +git add broker_sync/metrics.py tests/test_metrics.py +git commit -m "metrics: add Pushgateway client for broker-sync providers" +``` + +--- + +## Task 4: `WealthfolioSink.compute_position_qty` — and tests + +**Files:** +- Modify: `broker_sync/sinks/wealthfolio.py` +- Modify: `tests/sinks/test_wealthfolio.py` + +- [ ] **Step 1: Write the failing test** + +Append to `tests/sinks/test_wealthfolio.py`: + +```python +@pytest.mark.asyncio +async def test_compute_position_qty_sums_buys_minus_sells(monkeypatch: MonkeyPatch) -> None: + """compute_position_qty groups activities by symbol and returns + BUY/ADD_HOLDING/TRANSFER_IN minus SELL/REMOVE_HOLDING/TRANSFER_OUT + quantities as Decimal.""" + from broker_sync.sinks.wealthfolio import WealthfolioSink + + fake_activities = [ + # symbol VUAG.L: 10 buys, 2 sells, net 8 + {"symbol": "VUAG.L", "activityType": "BUY", "quantity": "10"}, + {"symbol": "VUAG.L", "activityType": "SELL", "quantity": "2"}, + # symbol AAPL: 5 buys + {"symbol": "AAPL", "activityType": "BUY", "quantity": "5"}, + # cash activities (no asset) — skipped + {"symbol": "$CASH-GBP", "activityType": "DEPOSIT", "quantity": "0", "amount": "100"}, + ] + + sink = WealthfolioSink(base_url="http://wf", username="u", password="p", session_path="/tmp/s") + + async def fake_search(account_id: str, page: int) -> dict: + return {"activities": fake_activities if page == 1 else [], "totalPages": 1} + + monkeypatch.setattr(sink, "_search_activities", fake_search) + result = await sink.compute_position_qty("acct-123") + assert result == {"VUAG.L": Decimal("8"), "AAPL": Decimal("5")} +``` + +Add the `Decimal` import at the top of the test module if missing: + +```python +from decimal import Decimal +``` + +- [ ] **Step 2: Run the test and verify it fails** + +```bash +poetry run pytest tests/sinks/test_wealthfolio.py::test_compute_position_qty_sums_buys_minus_sells -v +``` + +Expected: FAIL with `AttributeError: 'WealthfolioSink' object has no attribute 'compute_position_qty'` (or similar — `_search_activities` may also be missing). + +- [ ] **Step 3: Add the method to WealthfolioSink** + +In `broker_sync/sinks/wealthfolio.py`, inside the `WealthfolioSink` class, add (alongside the existing methods): + +```python +async def _search_activities(self, account_id: str, page: int) -> dict[str, Any]: + """Internal: one page of /activities/search results for an account.""" + resp = await self._request( + "POST", + "/api/v1/activities/search", + json={"accountIds": [account_id], "page": page, "pageSize": 500}, + ) + resp.raise_for_status() + return resp.json() # type: ignore[no-any-return] + +async def compute_position_qty(self, account_id: str) -> dict[str, Decimal]: + """Return per-symbol net position quantity (BUY/IN minus SELL/OUT) for + one account. Skips cash activities. Used by the IBKR reconciliation + step to compare against broker-reported OpenPositions.""" + qty_by_symbol: dict[str, Decimal] = {} + page = 1 + while True: + payload = await self._search_activities(account_id, page) + activities = payload.get("activities", []) + if not activities: + break + for act in activities: + symbol = act.get("symbol") + if not symbol or symbol.startswith("$CASH"): + continue + act_type = act.get("activityType") + sign: int + if act_type in {"BUY", "ADD_HOLDING", "TRANSFER_IN"}: + sign = 1 + elif act_type in {"SELL", "REMOVE_HOLDING", "TRANSFER_OUT"}: + sign = -1 + else: + continue + qty = Decimal(str(act.get("quantity") or 0)) + qty_by_symbol[symbol] = qty_by_symbol.get(symbol, Decimal(0)) + sign * qty + if page >= int(payload.get("totalPages") or 1): + break + page += 1 + return qty_by_symbol +``` + +Add the `Decimal` import at the top of `wealthfolio.py` if missing: + +```python +from decimal import Decimal +``` + +- [ ] **Step 4: Run the test and verify it passes** + +```bash +poetry run pytest tests/sinks/test_wealthfolio.py::test_compute_position_qty_sums_buys_minus_sells -v +``` + +Expected: PASS. + +- [ ] **Step 5: Run mypy + ruff + full pytest** + +```bash +poetry run mypy broker_sync tests && poetry run ruff check . && poetry run pytest -q +``` + +Expected: all clean. + +- [ ] **Step 6: Commit** + +```bash +git add broker_sync/sinks/wealthfolio.py tests/sinks/test_wealthfolio.py +git commit -m "wealthfolio: add compute_position_qty for broker reconciliation" +``` + +--- + +## Task 5: `providers/ibkr.py` — symbol canonicalisation + +**Files:** +- Create: `broker_sync/providers/ibkr.py` +- Create: `tests/providers/test_ibkr.py` + +- [ ] **Step 1: Write the failing test** + +Create `tests/providers/test_ibkr.py`: + +```python +from __future__ import annotations + +import pytest + +from broker_sync.providers.ibkr import canonical_symbol + + +def test_canonical_symbol_lse_etf_gets_l_suffix() -> None: + assert canonical_symbol("VUAG", exchange="LSE", currency="GBP") == "VUAG.L" + + +def test_canonical_symbol_us_stock_unchanged() -> None: + assert canonical_symbol("AAPL", exchange="NASDAQ", currency="USD") == "AAPL" + + +def test_canonical_symbol_lse_gbp_inferred_when_exchange_missing() -> None: + """IBKR Flex sometimes omits exchange. Infer LSE from currency==GBP.""" + assert canonical_symbol("VUAG", exchange=None, currency="GBP") == "VUAG.L" + + +def test_canonical_symbol_already_suffixed_unchanged() -> None: + assert canonical_symbol("VUAG.L", exchange="LSE", currency="GBP") == "VUAG.L" +``` + +- [ ] **Step 2: Run the test and verify it fails** + +```bash +poetry run pytest tests/providers/test_ibkr.py -v +``` + +Expected: FAIL with `ModuleNotFoundError: No module named 'broker_sync.providers.ibkr'`. + +- [ ] **Step 3: Create the provider module with `canonical_symbol`** + +Create `broker_sync/providers/ibkr.py`: + +```python +"""Interactive Brokers Flex Web Service ingestion provider. + +Pulls daily Activity Flex Query reports via the ``ibflex`` library, maps +Trades + CashTransactions to broker-sync ``Activity`` objects, and runs a +reconciliation step against the broker-reported ``OpenPositions``. + +See ``docs/specs/2026-05-26-ibkr-ingest-design.md`` for the full design. +""" +from __future__ import annotations + +import logging +from decimal import Decimal +from typing import TYPE_CHECKING + +from broker_sync.models import Account, AccountType, Activity, ActivityType + +if TYPE_CHECKING: + from ibflex import FlexQueryResponse + +log = logging.getLogger(__name__) + +# Map IBKR currency -> default exchange suffix. +# Only set up for the GBP / LSE case today; extend when more accounts onboard. +_CURRENCY_TO_LSE_SUFFIX = {"GBP": ".L"} + + +def canonical_symbol(symbol: str, *, exchange: str | None, currency: str) -> str: + """Return the WF-canonical form of an IBKR ticker. + + LSE-listed GBP instruments get a ``.L`` suffix (Wealthfolio convention). + US instruments and anything already suffixed are returned unchanged. + """ + if "." in symbol: + return symbol + if exchange in {"LSE", "LSEETF", "LSEIOB1"} or ( + exchange is None and currency in _CURRENCY_TO_LSE_SUFFIX + ): + return symbol + _CURRENCY_TO_LSE_SUFFIX.get(currency, ".L") + return symbol +``` + +- [ ] **Step 4: Run the test and verify it passes** + +```bash +poetry run pytest tests/providers/test_ibkr.py -v +``` + +Expected: 4 tests PASS. + +- [ ] **Step 5: Type + lint** + +```bash +poetry run mypy broker_sync/providers/ibkr.py tests/providers/test_ibkr.py && poetry run ruff check broker_sync/providers/ibkr.py tests/providers/test_ibkr.py +``` + +Expected: clean. + +- [ ] **Step 6: Commit** + +```bash +git add broker_sync/providers/ibkr.py tests/providers/test_ibkr.py +git commit -m "ibkr: add canonical_symbol helper (LSE .L suffix handling)" +``` + +--- + +## Task 6: `_map_trade_to_activity` + +**Files:** +- Modify: `broker_sync/providers/ibkr.py` +- Modify: `tests/providers/test_ibkr.py` + +- [ ] **Step 1: Write the failing test** + +Append to `tests/providers/test_ibkr.py`: + +```python +def test_map_trade_buy_to_activity() -> None: + """Trade with buySell=BUY maps to Activity(activity_type=BUY) with + positive quantity, fee = abs(ibCommission), external_id = ibkr:trade:.""" + from datetime import datetime + from decimal import Decimal + + from broker_sync.providers.ibkr import _map_trade_to_activity + from ibflex import parser + + r = parser.parse("tests/fixtures/ibkr/sample_flex.xml") + trade = r.FlexStatements[0].Trades[0] # T1001: 10 VUAG BUY @ 107.50 GBP + + activity = _map_trade_to_activity(trade, account_id="wf-acct-uuid") + + assert activity.external_id == "ibkr:trade:T1001" + assert activity.account_id == "wf-acct-uuid" + assert activity.activity_type == ActivityType.BUY + assert activity.symbol == "VUAG.L" + assert activity.quantity == Decimal("10") + assert activity.unit_price == Decimal("107.50") + assert activity.fee == Decimal("1.05") + assert activity.currency == "GBP" + assert isinstance(activity.date, datetime) + assert activity.date.tzinfo is not None +``` + +- [ ] **Step 2: Run and verify it fails** + +```bash +poetry run pytest tests/providers/test_ibkr.py::test_map_trade_buy_to_activity -v +``` + +Expected: FAIL with `ImportError: cannot import name '_map_trade_to_activity'`. + +- [ ] **Step 3: Add the mapper** + +Append to `broker_sync/providers/ibkr.py`: + +```python +from datetime import UTC, datetime # noqa: E402 (grouped here for the mapper section) + +if TYPE_CHECKING: + from ibflex.Types import OpenPosition, Trade + from ibflex.Types import CashTransaction as IBFlexCashTransaction + + +def _trade_to_datetime(trade_date: object, trade_time: str | None) -> datetime: + """Combine Flex tradeDate (a date) + tradeTime (HH:MM:SS TZ) into UTC datetime.""" + if isinstance(trade_date, datetime): + # ibflex sometimes already returns datetime + dt = trade_date + else: + # date object + time_part = (trade_time or "00:00:00 UTC").split()[0] + dt = datetime.fromisoformat(f"{trade_date.isoformat()}T{time_part}") + if dt.tzinfo is None: + dt = dt.replace(tzinfo=UTC) + return dt.astimezone(UTC) + + +def _map_trade_to_activity(trade: Trade, *, account_id: str) -> Activity: + """Map one ibflex Trade dataclass to a broker-sync Activity.""" + buy_sell = str(trade.buySell.name) if hasattr(trade.buySell, "name") else str(trade.buySell) + if buy_sell == "BUY": + activity_type = ActivityType.BUY + elif buy_sell == "SELL": + activity_type = ActivityType.SELL + else: + raise ValueError(f"unsupported Trade.buySell={buy_sell!r} on tradeID={trade.tradeID}") + + symbol = canonical_symbol( + str(trade.symbol), + exchange=getattr(trade, "exchange", None), + currency=str(trade.currency), + ) + quantity = abs(Decimal(str(trade.quantity))) + unit_price = Decimal(str(trade.tradePrice)) + fee = abs(Decimal(str(trade.ibCommission or 0))) + return Activity( + external_id=f"ibkr:trade:{trade.tradeID}", + account_id=account_id, + account_type=AccountType.GIA, + date=_trade_to_datetime(trade.tradeDate, getattr(trade, "tradeTime", None)), + activity_type=activity_type, + currency=str(trade.currency), + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + fee=fee, + ) +``` + +Move the `from datetime import UTC, datetime` import to the top-level imports +section if your repo's lint rules forbid late imports — ruff's E402 is suppressed +here via `# noqa: E402` because grouping helps readability. + +- [ ] **Step 4: Run the test and verify it passes** + +```bash +poetry run pytest tests/providers/test_ibkr.py -v +``` + +Expected: all 5 tests pass. + +- [ ] **Step 5: Type + lint** + +```bash +poetry run mypy broker_sync/providers/ibkr.py && poetry run ruff check broker_sync/providers/ibkr.py +``` + +Expected: clean. + +- [ ] **Step 6: Commit** + +```bash +git add broker_sync/providers/ibkr.py tests/providers/test_ibkr.py +git commit -m "ibkr: map Flex Trades to broker-sync Activities" +``` + +--- + +## Task 7: `_map_cash_to_activity` + +**Files:** +- Modify: `broker_sync/providers/ibkr.py` +- Modify: `tests/providers/test_ibkr.py` + +- [ ] **Step 1: Write the failing test** + +Append to `tests/providers/test_ibkr.py`: + +```python +def test_map_cash_dividend_to_activity() -> None: + from decimal import Decimal + + from broker_sync.providers.ibkr import _map_cash_to_activity + from ibflex import parser + + r = parser.parse("tests/fixtures/ibkr/sample_flex.xml") + cash = r.FlexStatements[0].CashTransactions[0] # C5001: Dividends 3.50 GBP + + activity = _map_cash_to_activity(cash, account_id="wf-acct-uuid") + assert activity is not None + assert activity.external_id == "ibkr:cash:C5001" + assert activity.activity_type == ActivityType.DIVIDEND + assert activity.amount == Decimal("3.50") + assert activity.currency == "GBP" + + +def test_map_cash_withholding_tax_to_fee_activity() -> None: + from decimal import Decimal + + from broker_sync.providers.ibkr import _map_cash_to_activity + from ibflex import parser + + r = parser.parse("tests/fixtures/ibkr/sample_flex.xml") + cash = r.FlexStatements[0].CashTransactions[1] # C5002: Withholding Tax -0.35 GBP + + activity = _map_cash_to_activity(cash, account_id="wf-acct-uuid") + assert activity is not None + assert activity.activity_type == ActivityType.FEE + assert activity.amount == Decimal("0.35") # always positive on Activity, sign carried by activity_type + + +def test_map_cash_unknown_type_returns_none_and_logs(caplog) -> None: # noqa: ANN001 + """Unknown CashTransaction.type produces None + a WARNING log line. + Same refusal-to-guess convention as the InvestEngine provider.""" + from broker_sync.providers.ibkr import _map_cash_to_activity + + class FakeCash: + transactionID = "C9999" + dateTime = None + type = type("T", (), {"name": "FrobnicatedThing"})() + amount = 0 + currency = "GBP" + + with caplog.at_level("WARNING"): + result = _map_cash_to_activity(FakeCash, account_id="wf-acct-uuid") + assert result is None + assert any("FrobnicatedThing" in r.message for r in caplog.records) +``` + +- [ ] **Step 2: Run and verify the new tests fail** + +```bash +poetry run pytest tests/providers/test_ibkr.py -v +``` + +Expected: 3 FAILs (the new tests), 5 existing PASS. + +- [ ] **Step 3: Add the cash mapper** + +Append to `broker_sync/providers/ibkr.py`: + +```python +# Maps the IBKR Flex CashTransaction.type values we expect to see for a +# stocks/ETFs-only GIA. Unknown values yield None + a WARNING — we refuse +# to guess (per IE/Schwab convention) to avoid silent misclassification. +_CASH_TYPE_MAP: dict[str, ActivityType] = { + "Dividends": ActivityType.DIVIDEND, + "Withholding Tax": ActivityType.FEE, + "Broker Interest Received": ActivityType.DIVIDEND, + "Broker Interest Paid": ActivityType.FEE, + "Commission Adjustments": ActivityType.FEE, + "Other Fees": ActivityType.FEE, +} + + +def _map_cash_to_activity( + cash: IBFlexCashTransaction, *, account_id: str +) -> Activity | None: + """Map one ibflex CashTransaction to a broker-sync Activity. + + Returns None for unsupported types (logged at WARNING). Deposit/Withdrawal + handled separately by sign of amount. + """ + type_obj = cash.type + type_name = type_obj.name if hasattr(type_obj, "name") else str(type_obj) + amount = Decimal(str(cash.amount)) + + # Deposit / Withdrawal split by sign — the Flex "Deposits & Withdrawals" type + if type_name in {"DepositsWithdrawals", "Deposits & Withdrawals", "Deposit Withdrawals"}: + activity_type = ActivityType.DEPOSIT if amount > 0 else ActivityType.WITHDRAWAL + else: + activity_type = _CASH_TYPE_MAP.get(type_name) # type: ignore[assignment] + if activity_type is None: + log.warning( + "ibkr: skipping cash transaction id=%s with unsupported type=%r", + getattr(cash, "transactionID", "?"), + type_name, + ) + return None + + dt = cash.dateTime + if isinstance(dt, datetime) and dt.tzinfo is None: + dt = dt.replace(tzinfo=UTC) + elif not isinstance(dt, datetime): + dt = datetime.now(UTC) # graceful fallback — log path also fine + + return Activity( + external_id=f"ibkr:cash:{cash.transactionID}", + account_id=account_id, + account_type=AccountType.GIA, + date=dt, + activity_type=activity_type, + currency=str(cash.currency), + amount=abs(amount), + ) +``` + +- [ ] **Step 4: Run and verify all tests pass** + +```bash +poetry run pytest tests/providers/test_ibkr.py -v +``` + +Expected: 8 tests pass. + +- [ ] **Step 5: Type + lint + commit** + +```bash +poetry run mypy broker_sync && poetry run ruff check broker_sync tests +git add broker_sync/providers/ibkr.py tests/providers/test_ibkr.py +git commit -m "ibkr: map Flex CashTransactions (dividends, fees, deposits)" +``` + +--- + +## Task 8: `IBKRProvider` class + account guard + +**Files:** +- Modify: `broker_sync/providers/ibkr.py` +- Modify: `tests/providers/test_ibkr.py` + +- [ ] **Step 1: Write the failing test** + +Append to `tests/providers/test_ibkr.py`: + +```python +@pytest.mark.asyncio +async def test_ibkr_provider_fetch_returns_mapped_activities(monkeypatch) -> None: # noqa: ANN001 + """IBKRProvider.fetch() yields all mapped activities (trades + cash).""" + from broker_sync.providers.ibkr import IBKRProvider + from ibflex import client as ib_client + + with open("tests/fixtures/ibkr/sample_flex.xml", "rb") as f: + xml_bytes = f.read() + monkeypatch.setattr(ib_client, "download", lambda *a, **kw: xml_bytes) + + provider = IBKRProvider( + token="t", + query_id="q", + wf_account_id="wf-acct", + upstream_account_id="U12345678", + ) + activities = [a async for a in provider.fetch()] + # 3 trades + 2 cash = 5 + assert len(activities) == 5 + types = sorted(a.activity_type.name for a in activities) + assert types == ["BUY", "BUY", "DIVIDEND", "FEE", "SELL"] + + +@pytest.mark.asyncio +async def test_ibkr_provider_account_mismatch_raises(monkeypatch) -> None: # noqa: ANN001 + """If Flex statement.accountId differs from the configured upstream id, + refuse to ingest. Prevents wrong-account writes from a misconfigured query.""" + from broker_sync.providers.ibkr import IBKRAccountMismatchError, IBKRProvider + from ibflex import client as ib_client + + with open("tests/fixtures/ibkr/sample_flex.xml", "rb") as f: + xml_bytes = f.read() + monkeypatch.setattr(ib_client, "download", lambda *a, **kw: xml_bytes) + + provider = IBKRProvider( + token="t", + query_id="q", + wf_account_id="wf-acct", + upstream_account_id="U99999999", # WRONG + ) + with pytest.raises(IBKRAccountMismatchError, match="U12345678"): + [a async for a in provider.fetch()] +``` + +- [ ] **Step 2: Run and verify the new tests fail** + +```bash +poetry run pytest tests/providers/test_ibkr.py -v +``` + +Expected: 2 FAILs (the new tests). Existing tests still pass. + +- [ ] **Step 3: Add the IBKRProvider class** + +Append to `broker_sync/providers/ibkr.py`: + +```python +from collections.abc import AsyncIterator # noqa: E402 + + +class IBKRError(Exception): + """Base class for ibkr-provider errors.""" + + +class IBKRAccountMismatchError(IBKRError): + """Flex statement accountId did not match configured upstream id.""" + + +class IBKRProvider: + """Fetches IBKR Flex Activity reports and yields broker-sync Activities. + + The reconciliation step (OpenPositions vs WF-computed qty) is NOT part + of fetch() — it runs at the CLI layer after import, since it needs the + WealthfolioSink to query WF. + """ + + def __init__( + self, + *, + token: str, + query_id: str, + wf_account_id: str, + upstream_account_id: str, + ) -> None: + self._token = token + self._query_id = query_id + self._wf_account_id = wf_account_id + self._upstream_account_id = upstream_account_id + # Stash the parsed response for the reconciliation step. + self._last_response: FlexQueryResponse | None = None + + def accounts(self) -> list[Account]: + return [ + Account( + id=self._wf_account_id, + provider="ibkr", + provider_account_id=self._upstream_account_id, + account_type=AccountType.GIA, + currency="GBP", # FX-aware at trade level; account currency is GBP + ) + ] + + async def close(self) -> None: + # No persistent HTTP client today — ibflex uses requests internally. + return + + async def fetch( + self, + *, + since: datetime | None = None, # noqa: ARG002 (Flex query owns the date range) + before: datetime | None = None, # noqa: ARG002 + ) -> AsyncIterator[Activity]: + from ibflex import client as ib_client + from ibflex import parser as ib_parser + + xml_bytes = ib_client.download(self._token, self._query_id) + response = ib_parser.parse(xml_bytes) + self._last_response = response + + if not response.FlexStatements: + log.warning("ibkr: Flex response had no FlexStatements") + return + + stmt = response.FlexStatements[0] + if str(stmt.accountId) != self._upstream_account_id: + raise IBKRAccountMismatchError( + f"Flex statement.accountId={stmt.accountId!r} does not match " + f"configured IBKR_ACCOUNT_ID_UPSTREAM={self._upstream_account_id!r} " + f"— refusing to ingest" + ) + + for trade in stmt.Trades or []: + yield _map_trade_to_activity(trade, account_id=self._wf_account_id) + + for cash in stmt.CashTransactions or []: + activity = _map_cash_to_activity(cash, account_id=self._wf_account_id) + if activity is not None: + yield activity + + def open_positions(self) -> list[tuple[str, Decimal]]: + """Return ``[(canonical_symbol, position_qty), ...]`` from the most + recent fetch. Used by the reconciliation step. + + Returns ``[]`` if no fetch has been called yet.""" + if self._last_response is None: + return [] + stmt = self._last_response.FlexStatements[0] + out: list[tuple[str, Decimal]] = [] + for pos in stmt.OpenPositions or []: + symbol = canonical_symbol( + str(pos.symbol), + exchange=getattr(pos, "exchange", None), + currency=str(pos.currency), + ) + out.append((symbol, Decimal(str(pos.position)))) + return out +``` + +- [ ] **Step 4: Run and verify all tests pass** + +```bash +poetry run pytest tests/providers/test_ibkr.py -v +``` + +Expected: 10 tests pass. + +- [ ] **Step 5: Type + lint + commit** + +```bash +poetry run mypy broker_sync && poetry run ruff check broker_sync tests +git add broker_sync/providers/ibkr.py tests/providers/test_ibkr.py +git commit -m "ibkr: add IBKRProvider with Flex fetch + account-mismatch guard" +``` + +--- + +## Task 9: `broker-sync ibkr` CLI command + +**Files:** +- Modify: `broker_sync/cli.py` + +- [ ] **Step 1: Read existing `invest_engine` command for pattern** + +```bash +sed -n '140,235p' /home/wizard/code/broker-sync/broker_sync/cli.py +``` + +You're using this as the template — `ibkr` is structurally identical +(provider construction → pipeline → sink → reconciliation). + +- [ ] **Step 2: Add the `ibkr` command** + +In `broker_sync/cli.py`, after the `invest_engine` command, add: + +```python +@app.command("ibkr") +def ibkr( # noqa: PLR0913 + wf_base_url: str = typer.Option(..., envvar="WF_BASE_URL"), + wf_username: str = typer.Option(..., envvar="WF_USERNAME"), + wf_password: str = typer.Option(..., envvar="WF_PASSWORD"), + wf_session_path: str = typer.Option( + "/data/wealthfolio_session.json", envvar="WF_SESSION_PATH" + ), + ibkr_flex_token: str = typer.Option(..., envvar="IBKR_FLEX_TOKEN"), + ibkr_flex_query_id: str = typer.Option(..., envvar="IBKR_FLEX_QUERY_ID"), + ibkr_account_id: str = typer.Option(..., envvar="IBKR_ACCOUNT_ID"), + ibkr_account_id_upstream: str = typer.Option(..., envvar="IBKR_ACCOUNT_ID_UPSTREAM"), + pushgateway_url: str = typer.Option( + "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics", + envvar="PUSHGATEWAY_URL", + ), + data_dir: str = typer.Option("/data", envvar="BROKER_SYNC_DATA_DIR"), +) -> None: + """Phase 2c — daily IBKR Flex Web Service → Wealthfolio sync.""" + import time + + from broker_sync.dedup import SyncRecordStore + from broker_sync.metrics import push_pushgateway + from broker_sync.pipeline import sync_provider_to_wealthfolio + from broker_sync.providers.ibkr import IBKRProvider + from broker_sync.sinks.wealthfolio import WealthfolioSink + + _setup_logging() + data = Path(data_dir) + data.mkdir(parents=True, exist_ok=True) + + async def _run() -> None: + sink = WealthfolioSink( + base_url=wf_base_url, + username=wf_username, + password=wf_password, + session_path=wf_session_path, + ) + provider = IBKRProvider( + token=ibkr_flex_token, + query_id=ibkr_flex_query_id, + wf_account_id=ibkr_account_id, + upstream_account_id=ibkr_account_id_upstream, + ) + dedup = SyncRecordStore(data / "sync.db") + try: + if not Path(wf_session_path).exists(): + await sink.login() + result = await sync_provider_to_wealthfolio( + provider=provider, + sink=sink, + dedup=dedup, + ) + + # Reconciliation: broker truth vs WF truth. + wf_qty = await sink.compute_position_qty(ibkr_account_id) + drift_metrics: list[tuple[str, dict[str, str], float]] = [] + for symbol, broker_qty in provider.open_positions(): + drift = broker_qty - wf_qty.get(symbol, Decimal(0)) + drift_metrics.append( + ( + "ibkr_position_drift_shares", + {"symbol": symbol, "account": "ibkr-uk"}, + float(drift), + ) + ) + drift_metrics.append( + ("ibkr_sync_last_success_timestamp_seconds", {}, float(time.time())) + ) + await push_pushgateway("broker-sync-ibkr", drift_metrics, pushgateway_url) + finally: + await sink.close() + await provider.close() + + typer.echo( + f"ibkr: fetched={result.fetched} new={result.new_after_dedup} " + f"imported={result.imported} failed={result.failed}" + ) + if result.failed > 0: + sys.exit(1) + + asyncio.run(_run()) +``` + +Add the `Decimal` import at the top of `cli.py` if missing. + +- [ ] **Step 3: Sanity-check the CLI compiles** + +```bash +poetry run broker-sync --help | grep -i ibkr +``` + +Expected: `ibkr Phase 2c — daily IBKR Flex Web Service → Wealthfolio sync.` + +- [ ] **Step 4: Run mypy + ruff + full pytest** + +```bash +poetry run mypy broker_sync tests && poetry run ruff check . && poetry run pytest -q +``` + +Expected: all clean. + +- [ ] **Step 5: Commit** + +```bash +git add broker_sync/cli.py +git commit -m "cli: add ibkr command (Flex pull + pipeline + reconcile + metrics)" +``` + +--- + +## Task 10: Push, wait for CI, verify image + +**Files:** (none — operational step) + +- [ ] **Step 1: Push to GitHub + Forgejo** + +```bash +git push origin main && git push forgejo main +``` + +- [ ] **Step 2: Wait for GHA CI to complete** + +```bash +until [ "$(gh api 'repos/ViktorBarzin/broker-sync/actions/runs?per_page=1' --jq '.workflow_runs[0].status')" = "completed" ]; do sleep 15; done +gh api 'repos/ViktorBarzin/broker-sync/actions/runs?per_page=1' --jq '.workflow_runs[0] | "\(.head_sha[:8]) \(.conclusion)"' +``` + +Expected: ` success`. + +- [ ] **Step 3: Pull the new image and confirm** + +```bash +docker pull viktorbarzin/broker-sync:latest +docker images viktorbarzin/broker-sync --format '{{.Tag}} {{.CreatedSince}}' +``` + +Expected: `latest` was created within the last few minutes. + +--- + +## Task 11: Vault secrets + WF account creation + +**Files:** (operational — no code changes) + +- [ ] **Step 1: User completes the IBKR Client Portal steps** + +Follow the design's setup checklist Step 1: +- Enable Flex Web Service → copy Token. +- Create Activity Flex Query → copy Query ID. +- Note the account number (e.g., `U12345678`). + +- [ ] **Step 2: Create the Wealthfolio account** + +```bash +WF_BASE="https://wealthfolio.viktorbarzin.me" # adjust if internal-only +WF_PASS=$(vault kv get -field=wf_password secret/broker-sync) +curl -sS -c /tmp/wf-jar -X POST "$WF_BASE/api/v1/auth/login" \ + -H 'Content-Type: application/json' \ + -d "{\"password\":\"$WF_PASS\"}" -o /dev/null +WF_UUID=$(curl -sS -b /tmp/wf-jar -X POST "$WF_BASE/api/v1/accounts" \ + -H 'Content-Type: application/json' \ + -d '{"name":"Interactive Brokers (UK)","accountType":"GIA","currency":"GBP","isActive":true}' \ + | jq -r '.id') +echo "WF account UUID = $WF_UUID" +``` + +Expected: prints a UUID. Note it down for the next step. + +- [ ] **Step 3: Put the 4 IBKR secrets into Vault** + +```bash +vault kv patch secret/broker-sync \ + ibkr_flex_token='' \ + ibkr_flex_query_id='' \ + ibkr_account_id='' \ + ibkr_account_id_upstream='' +``` + +- [ ] **Step 4: Verify the secrets are readable** + +```bash +vault kv get -format=json secret/broker-sync | jq '.data.data | {token: (.ibkr_flex_token[0:6]+"..."), query_id, account_id, account_id_upstream}' +``` + +Expected: all four fields present, token truncated. + +--- + +## Task 12: Terraform CronJob + alerts + +**Files:** +- Modify: `infra/stacks/broker-sync/main.tf` + +- [ ] **Step 1: Open `infra/stacks/broker-sync/main.tf` and find the `trading212` CronJob** + +```bash +grep -n 'kubernetes_cron_job_v1.*trading212\|broker-sync-trading212' /home/wizard/code/infra/stacks/broker-sync/main.tf +``` + +Use it as the template — copy/paste then adjust the diffs. + +- [ ] **Step 2: Add the IBKR CronJob resource** + +After the `trading212` CronJob block, add: + +```hcl +# IBKR Flex Web Service daily sync. Phase 2c deliverable. +resource "kubernetes_cron_job_v1" "ibkr" { + metadata { + name = "broker-sync-ibkr" + namespace = kubernetes_namespace.broker_sync.metadata[0].name + labels = { app = "broker-sync", component = "ibkr" } + } + spec { + schedule = "0 2 * * *" # 02:00 UK + concurrency_policy = "Forbid" + starting_deadline_seconds = 300 + successful_jobs_history_limit = 3 + failed_jobs_history_limit = 5 + job_template { + metadata {} + spec { + backoff_limit = 2 + ttl_seconds_after_finished = 86400 + template { + metadata { + labels = { app = "broker-sync", component = "ibkr" } + } + spec { + restart_policy = "OnFailure" + security_context { + fs_group = 10001 + } + container { + name = "broker-sync" + image = local.broker_sync_image + command = ["broker-sync", "ibkr"] + + env { + name = "BROKER_SYNC_DATA_DIR" + value = "/data" + } + env { + name = "WF_SESSION_PATH" + value = "/data/wealthfolio_session.json" + } + env { + name = "WF_BASE_URL" + value_from { secret_key_ref { name = "broker-sync-secrets"; key = "wf_base_url" } } + } + env { + name = "WF_USERNAME" + value_from { secret_key_ref { name = "broker-sync-secrets"; key = "wf_username" } } + } + env { + name = "WF_PASSWORD" + value_from { secret_key_ref { name = "broker-sync-secrets"; key = "wf_password" } } + } + env { + name = "IBKR_FLEX_TOKEN" + value_from { secret_key_ref { name = "broker-sync-secrets"; key = "ibkr_flex_token" } } + } + env { + name = "IBKR_FLEX_QUERY_ID" + value_from { secret_key_ref { name = "broker-sync-secrets"; key = "ibkr_flex_query_id" } } + } + env { + name = "IBKR_ACCOUNT_ID" + value_from { secret_key_ref { name = "broker-sync-secrets"; key = "ibkr_account_id" } } + } + env { + name = "IBKR_ACCOUNT_ID_UPSTREAM" + value_from { secret_key_ref { name = "broker-sync-secrets"; key = "ibkr_account_id_upstream" } } + } + + volume_mount { + name = "data" + mount_path = "/data" + } + resources { + requests = { cpu = "20m", memory = "128Mi" } + limits = { memory = "256Mi" } + } + } + volume { + name = "data" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name + } + } + } + } + } + } + } + lifecycle { + # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2 + ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] + } +} +``` + +- [ ] **Step 3: Format the terraform** + +```bash +cd /home/wizard/code/infra/stacks/broker-sync && terraform fmt main.tf +``` + +- [ ] **Step 4: Plan** + +```bash +/home/wizard/code/infra/scripts/tg plan 2>&1 | tail -20 +``` + +Expected: `Plan: 1 to add, 0 to change, 0 to destroy.` (the new ibkr CronJob). + +- [ ] **Step 5: Apply** + +```bash +/home/wizard/code/infra/scripts/tg apply --non-interactive 2>&1 | tail -5 +``` + +Expected: `Apply complete! Resources: 1 added, ...`. + +- [ ] **Step 6: Verify the CronJob exists** + +```bash +kubectl -n broker-sync get cronjob broker-sync-ibkr +``` + +Expected: row appears with `SCHEDULE = 0 2 * * *`. + +- [ ] **Step 7: Commit** + +```bash +cd /home/wizard/code/infra +git add stacks/broker-sync/main.tf +git commit -m "broker-sync: add IBKR Flex daily CronJob" +git push origin master +``` + +--- + +## Task 13: Manual smoke run + verification + +**Files:** (none — operational) + +- [ ] **Step 1: Trigger the CronJob manually** + +```bash +kubectl -n broker-sync create job --from=cronjob/broker-sync-ibkr broker-sync-ibkr-smoke-$(date +%s) +``` + +- [ ] **Step 2: Wait for completion + check status** + +```bash +JOB=$(kubectl -n broker-sync get jobs --sort-by=.metadata.creationTimestamp -o name | grep broker-sync-ibkr-smoke | tail -1) +until [ "$(kubectl -n broker-sync get $JOB -o jsonpath='{.status.succeeded}{.status.failed}' 2>/dev/null)" != "" ]; do sleep 5; done +kubectl -n broker-sync get $JOB +``` + +Expected: `STATUS = Complete`. (If `Failed`, check logs in step 3 and debug.) + +- [ ] **Step 3: Inspect the logs** + +```bash +kubectl -n broker-sync logs -l job-name=$(basename $JOB) --tail=200 +``` + +Look for: +- `ibkr: fetched=0 new=0 imported=0 failed=0` (account is empty, so zero + rows is correct). +- A `pushgateway: pushed N metrics` line. +- No tracebacks. + +- [ ] **Step 4: Verify the WF account exists with no activities** + +```bash +WF_PASS=$(vault kv get -field=wf_password secret/broker-sync) +curl -sS -c /tmp/wf-jar -X POST https://wealthfolio.viktorbarzin.me/api/v1/auth/login \ + -H 'Content-Type: application/json' -d "{\"password\":\"$WF_PASS\"}" -o /dev/null +curl -sS -b /tmp/wf-jar https://wealthfolio.viktorbarzin.me/api/v1/accounts | jq '.[] | select(.name=="Interactive Brokers (UK)")' +``` + +Expected: prints the account JSON with the UUID from Task 11 Step 2. + +- [ ] **Step 5: Verify Pushgateway received the metrics** + +```bash +kubectl -n monitoring port-forward svc/prometheus-prometheus-pushgateway 9091:9091 & +sleep 2 +curl -sS http://localhost:9091/metrics | grep -E 'ibkr_(position_drift_shares|sync_last_success)' +kill %1 +``` + +Expected: `ibkr_sync_last_success_timestamp_seconds` shows a recent +unix timestamp. `ibkr_position_drift_shares` may be absent if there +were no open positions today, which is correct for an empty account. + +--- + +## Task 14: Provider docs (for future-you) + +**Files:** +- Create: `docs/providers/ibkr.md` + +- [ ] **Step 1: Write the production-facing provider doc** + +Create `docs/providers/ibkr.md`: + +```markdown +# Provider: Interactive Brokers (IBKR Flex Web Service) + +Pulls a daily Activity Flex Query via the `ibflex` library, maps Trades + +CashTransactions to broker-sync Activities, and reconciles broker-side +OpenPositions against WF-computed quantities. + +## When this runs +- K8s CronJob `broker-sync-ibkr` in the `broker-sync` namespace, daily 02:00 UK. +- Manual: `kubectl -n broker-sync create job --from=cronjob/broker-sync-ibkr broker-sync-ibkr-manual-1`. + +## Secrets (Vault `secret/broker-sync`) + +| Key | Description | +|---|---| +| `ibkr_flex_token` | Flex Web Service token (1-year validity, rotate via IBKR Client Portal) | +| `ibkr_flex_query_id` | Activity Flex Query ID (5-7 digit number) | +| `ibkr_account_id` | Wealthfolio account UUID for "Interactive Brokers (UK)" | +| `ibkr_account_id_upstream` | IBKR-side account number (e.g., `U12345678`) — guards against wrong-account ingestion | + +## Flex Query design + +| Section | Fields used | +|---|---| +| Account Information | accountId | +| Trades | tradeID, tradeDate, tradeTime, symbol, buySell, quantity, tradePrice, currency, ibCommission, assetCategory | +| Cash Transactions | transactionID, dateTime, type, amount, currency, description | +| Open Positions | symbol, position, markPrice, currency, assetCategory | +| Securities Information | symbol, description, conid | + +Date range: `Last Business Day` for daily incremental. Switch to +`Year to Date` for one-time backfills only. + +## Cash type mapping + +| IBKR Flex type | broker-sync ActivityType | +|---|---| +| Dividends | DIVIDEND | +| Withholding Tax | FEE | +| Broker Interest Received | DIVIDEND | +| Broker Interest Paid | FEE | +| Commission Adjustments | FEE | +| Other Fees | FEE | +| Deposits & Withdrawals | DEPOSIT (amount > 0) or WITHDRAWAL (amount < 0) | +| anything else | skipped + WARNING logged (refusal-to-guess) | + +## External IDs (dedup keys) +- Trades: `ibkr:trade:` +- Cash: `ibkr:cash:` + +Both are stable across re-runs — the `dedup.SyncRecordStore` rejects +already-seen IDs. + +## Symbol canonicalisation +LSE-listed GBP instruments get a `.L` suffix (Wealthfolio convention). +US instruments and anything already suffixed pass through unchanged. + +## Position reconciliation +Each run pushes to Pushgateway: +- `ibkr_position_drift_shares{symbol, account}` — broker_qty − wf_qty per asset +- `ibkr_sync_last_success_timestamp_seconds` — unix timestamp + +Alerts (defined in monitoring stack — TBD until first non-zero drift): +- `IBKRPositionDrift{symbol}` — `|drift| > 0.01` for >24h, Slack `#security`. +- `IBKRSyncStale` — timestamp > 36h old. +- `IBKRFlexTokenExpired` — Loki rule on the "code 1003" log line. + +## Token rotation +Flex tokens expire after 1 year. When the cron starts failing with +`ResponseCodeError(code=1003)`: +1. Sign in to IBKR Client Portal → Reports → Settings → Flex Web Service → regenerate token. +2. `vault kv patch secret/broker-sync ibkr_flex_token=''`. +3. ExternalSecrets controller picks it up within 15 min; no manual restart needed. + +## Spec / plan +Design: `docs/specs/2026-05-26-ibkr-ingest-design.md` +Implementation plan: `docs/plans/2026-05-26-ibkr-flex-ingestion.md` +``` + +- [ ] **Step 2: Commit** + +```bash +cd /home/wizard/code/broker-sync +git add docs/providers/ibkr.md +git commit -m "docs: add IBKR provider runbook" +git push origin main && git push forgejo main +``` + +--- + +## Task 15: Acceptance — 7-day soak + +**Files:** (none — observational) + +- [ ] **Step 1: Set a 7-day calendar reminder to re-check** + +Set a reminder for `2026-06-02` (today + 7 days). + +- [ ] **Step 2: On 2026-06-02, run the acceptance check** + +```bash +# Last 7 days of CronJob outcomes +kubectl -n broker-sync get jobs --sort-by=.metadata.creationTimestamp -o wide \ + | grep broker-sync-ibkr-2 + +# Pushgateway should have a recent success timestamp +kubectl -n monitoring port-forward svc/prometheus-prometheus-pushgateway 9091:9091 & +sleep 2 +curl -sS http://localhost:9091/metrics | grep ibkr_sync_last_success +kill %1 + +# Pushgateway drift should be zero on all symbols (account still empty, or +# else broker matches WF) +curl -sS http://localhost:9091/metrics | grep ibkr_position_drift_shares +``` + +Expected: +- ≥6 of the 7 nightly runs `Complete`. +- `ibkr_sync_last_success_timestamp_seconds` within the last 36 hours. +- `ibkr_position_drift_shares` all zero. + +- [ ] **Step 3: If all green, close the implementation plan** + +Mark this plan file as `Status: Done` at the top and commit. + +If not green, file beads tasks for the specific issues and revisit. + +--- + +## Self-review notes + +- **Spec coverage**: every section of `docs/specs/2026-05-26-ibkr-ingest-design.md` + maps to one or more tasks (deps→1, fixtures→2, metrics→3, sink helper→4, + symbol canon→5, trade map→6, cash map→7, provider→8, CLI→9, image→10, + setup→11, CronJob→12, smoke→13, docs→14, soak→15). +- **Placeholder scan**: no `TBD` in the plan body. The doc file + `docs/providers/ibkr.md` includes one explicit TBD about + PrometheusRule definitions — that's intentional, deferred to the + monitoring stack work (out-of-scope here; first non-zero drift event + will prompt the alert PR). +- **Type consistency**: `IBKRProvider.fetch` is `AsyncIterator[Activity]` + throughout. `compute_position_qty` returns `dict[str, Decimal]` in + both the sink and the CLI consumer. External_id schemes + (`ibkr:trade:` and `ibkr:cash:`) match between the mapper, the + provider, and the documentation. From 7cba540c37f7b02211f33232ab03a9703ec9ac42 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 22:21:54 +0000 Subject: [PATCH 29/44] deps: add ibflex with web extras for IBKR Flex Web Service ingestion --- poetry.lock | 199 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 3 + 2 files changed, 201 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index f4abb62..56df0e2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -73,6 +73,145 @@ files = [ {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"}, ] +[[package]] +name = "charset-normalizer" +version = "3.4.7" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "charset_normalizer-3.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cdd68a1fb318e290a2077696b7eb7a21a49163c455979c639bf5a5dcdc46617d"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e17b8d5d6a8c47c85e68ca8379def1303fd360c3e22093a807cd34a71cd082b8"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:511ef87c8aec0783e08ac18565a16d435372bc1ac25a91e6ac7f5ef2b0bff790"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:007d05ec7321d12a40227aae9e2bc6dca73f3cb21058999a1df9e193555a9dcc"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf29836da5119f3c8a8a70667b0ef5fdca3bb12f80fd06487cfa575b3909b393"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:12d8baf840cc7889b37c7c770f478adea7adce3dcb3944d02ec87508e2dcf153"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d560742f3c0d62afaccf9f41fe485ed69bd7661a241f86a3ef0f0fb8b1a397af"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b14b2d9dac08e28bb8046a1a0434b1750eb221c8f5b87a68f4fa11a6f97b5e34"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:bc17a677b21b3502a21f66a8cc64f5bfad4df8a0b8434d661666f8ce90ac3af1"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:750e02e074872a3fad7f233b47734166440af3cdea0add3e95163110816d6752"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:4e5163c14bffd570ef2affbfdd77bba66383890797df43dc8b4cc7d6f500bf53"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6ed74185b2db44f41ef35fd1617c5888e59792da9bbc9190d6c7300617182616"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:94e1885b270625a9a828c9793b4d52a64445299baa1fea5a173bf1d3dd9a1a5a"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-win32.whl", hash = "sha256:6785f414ae0f3c733c437e0f3929197934f526d19dfaa75e18fdb4f94c6fb374"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:6696b7688f54f5af4462118f0bfa7c1621eeb87154f77fa04b9295ce7a8f2943"}, + {file = "charset_normalizer-3.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:66671f93accb62ed07da56613636f3641f1a12c13046ce91ffc923721f23c008"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7641bb8895e77f921102f72833904dcd9901df5d6d72a2ab8f31d04b7e51e4e7"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:202389074300232baeb53ae2569a60901f7efadd4245cf3a3bf0617d60b439d7"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:30b8d1d8c52a48c2c5690e152c169b673487a2a58de1ec7393196753063fcd5e"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:532bc9bf33a68613fd7d65e4b1c71a6a38d7d42604ecf239c77392e9b4e8998c"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fe249cb4651fd12605b7288b24751d8bfd46d35f12a20b1ba33dea122e690df"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:65bcd23054beab4d166035cabbc868a09c1a49d1efe458fe8e4361215df40265"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:08e721811161356f97b4059a9ba7bafb23ea5ee2255402c42881c214e173c6b4"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e060d01aec0a910bdccb8be71faf34e7799ce36950f8294c8bf612cba65a2c9e"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:38c0109396c4cfc574d502df99742a45c72c08eff0a36158b6f04000043dbf38"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1c2a768fdd44ee4a9339a9b0b130049139b8ce3c01d2ce09f67f5a68048d477c"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:1a87ca9d5df6fe460483d9a5bbf2b18f620cbed41b432e2bddb686228282d10b"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d635aab80466bc95771bb78d5370e74d36d1fe31467b6b29b8b57b2a3cd7d22c"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ae196f021b5e7c78e918242d217db021ed2a6ace2bc6ae94c0fc596221c7f58d"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-win32.whl", hash = "sha256:adb2597b428735679446b46c8badf467b4ca5f5056aae4d51a19f9570301b1ad"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:8e385e4267ab76874ae30db04c627faaaf0b509e1ccc11a95b3fc3e83f855c00"}, + {file = "charset_normalizer-3.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:d4a48e5b3c2a489fae013b7589308a40146ee081f6f509e047e0e096084ceca1"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6"}, + {file = "charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110"}, + {file = "charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f"}, + {file = "charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c"}, + {file = "charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e5f4d355f0a2b1a31bc3edec6795b46324349c9cb25eed068049e4f472fb4259"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16d971e29578a5e97d7117866d15889a4a07befe0e87e703ed63cd90cb348c01"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dca4bbc466a95ba9c0234ef56d7dd9509f63da22274589ebd4ed7f1f4d4c54e3"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e80c8378d8f3d83cd3164da1ad2df9e37a666cdde7b1cb2298ed0b558064be30"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:36836d6ff945a00b88ba1e4572d721e60b5b8c98c155d465f56ad19d68f23734"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux_2_31_armv7l.whl", hash = "sha256:bd9b23791fe793e4968dba0c447e12f78e425c59fc0e3b97f6450f4781f3ee60"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:aef65cd602a6d0e0ff6f9930fcb1c8fec60dd2cfcb6facaf4bdb0e5873042db0"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:82b271f5137d07749f7bf32f70b17ab6eaabedd297e75dce75081a24f76eb545"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:1efde3cae86c8c273f1eb3b287be7d8499420cf2fe7585c41d370d3e790054a5"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:c593052c465475e64bbfe5dbd81680f64a67fdc752c56d7a0ae205dc8aeefe0f"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_riscv64.whl", hash = "sha256:af21eb4409a119e365397b2adbaca4c9ccab56543a65d5dbd9f920d6ac29f686"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:84c018e49c3bf790f9c2771c45e9313a08c2c2a6342b162cd650258b57817706"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dd915403e231e6b1809fe9b6d9fc55cf8fb5e02765ac625d9cd623342a7905d7"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-win32.whl", hash = "sha256:320ade88cfb846b8cd6b4ddf5ee9e80ee0c1f52401f2456b84ae1ae6a1a5f207"}, + {file = "charset_normalizer-3.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:1dc8b0ea451d6e69735094606991f32867807881400f808a106ee1d963c46a83"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:177a0ba5f0211d488e295aaf82707237e331c24788d8d76c96c5a41594723217"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e0d51f618228538a3e8f46bd246f87a6cd030565e015803691603f55e12afb5"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:14265bfe1f09498b9d8ec91e9ec9fa52775edf90fcbde092b25f4a33d444fea9"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:87fad7d9ba98c86bcb41b2dc8dbb326619be2562af1f8ff50776a39e55721c5a"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f22dec1690b584cea26fade98b2435c132c1b5f68e39f5a0b7627cd7ae31f1dc"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:d61f00a0869d77422d9b2aba989e2d24afa6ffd552af442e0e58de4f35ea6d00"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6370e8686f662e6a3941ee48ed4742317cafbe5707e36406e9df792cdb535776"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a6c5863edfbe888d9eff9c8b8087354e27618d9da76425c119293f11712a6319"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:ed065083d0898c9d5b4bbec7b026fd755ff7454e6e8b73a67f8c744b13986e24"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:2cd4a60d0e2fb04537162c62bbbb4182f53541fe0ede35cdf270a1c1e723cc42"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:813c0e0132266c08eb87469a642cb30aaff57c5f426255419572aaeceeaa7bf4"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:07d9e39b01743c3717745f4c530a6349eadbfa043c7577eef86c502c15df2c67"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c0f081d69a6e58272819b70288d3221a6ee64b98df852631c80f293514d3b274"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-win32.whl", hash = "sha256:8751d2787c9131302398b11e6c8068053dcb55d5a8964e114b6e196cf16cb366"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:12a6fff75f6bc66711b73a2f0addfc4c8c15a20e805146a02d147a318962c444"}, + {file = "charset_normalizer-3.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:bb8cc7534f51d9a017b93e3e85b260924f909601c3df002bcdb58ddb4dc41a5c"}, + {file = "charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d"}, + {file = "charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5"}, +] + [[package]] name = "click" version = "8.1.8" @@ -234,6 +373,24 @@ http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "ibflex" +version = "1.1" +description = "Parse Interactive Brokers Flex XML reports and convert to Python types" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "ibflex-1.1-py3-none-any.whl", hash = "sha256:c84e02dafcd17f70587777c2e2f00e3cc1e949e045790bf4fe562fb03dbef434"}, + {file = "ibflex-1.1.tar.gz", hash = "sha256:3e5cac02cadcbd22ea46ae4ca306d67c274b7166f40119f5d7d7103a130d032a"}, +] + +[package.dependencies] +requests = {version = "*", optional = true, markers = "extra == \"web\""} + +[package.extras] +web = ["requests"] + [[package]] name = "idna" version = "3.11" @@ -663,6 +820,28 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "requests" +version = "2.34.2" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "requests-2.34.2-py3-none-any.whl", hash = "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0"}, + {file = "requests-2.34.2.tar.gz", hash = "sha256:f288924cae4e29463698d6d60bc6a4da69c89185ad1e0bcc4104f584e960b9ed"}, +] + +[package.dependencies] +certifi = ">=2023.5.7" +charset_normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.26,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<8)"] + [[package]] name = "rich" version = "15.0.0" @@ -800,6 +979,24 @@ files = [ {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] +[[package]] +name = "urllib3" +version = "2.7.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897"}, + {file = "urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c"}, +] + +[package.extras] +brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""] + [[package]] name = "yapf" version = "0.43.0" @@ -818,4 +1015,4 @@ platformdirs = ">=3.5.1" [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "b3896b2258a425cce9498be9ada5bd48a06d5f2bd7c53ead044ad27c53086bd7" +content-hash = "8a704e79729d5bd3cbe78a7e35c51e9da724880915c0152788273b94bd00610d" diff --git a/pyproject.toml b/pyproject.toml index e5860d5..e6281b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,9 @@ aiomysql = "^0.3.2" # long-lived session alive (storage_state + device-trust cookie); actual data # is fetched via httpx against the SPA's private JSON backend. playwright = "^1.47" +# IBKR Flex Web Service: pulls Activity Flex Query XML reports (token-auth) +# and parses to typed dataclasses. No Gateway / daily re-auth needed. +ibflex = { version = "^1.1", extras = ["web"] } [tool.poetry.group.dev.dependencies] pytest = "^8.3" From 82797908b774ac52935d3cbb976eb835e121306f Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 22:23:20 +0000 Subject: [PATCH 30/44] test: add IBKR Flex XML fixture (3 trades, 2 cash txns, 2 positions) --- tests/fixtures/ibkr/sample_flex.xml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/fixtures/ibkr/sample_flex.xml diff --git a/tests/fixtures/ibkr/sample_flex.xml b/tests/fixtures/ibkr/sample_flex.xml new file mode 100644 index 0000000..0d82fcf --- /dev/null +++ b/tests/fixtures/ibkr/sample_flex.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + From 975c3b4bf7e13bc545fce8f9511d1006fb123fc5 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 22:24:55 +0000 Subject: [PATCH 31/44] metrics: add Pushgateway client for broker-sync providers --- broker_sync/metrics.py | 51 ++++++++++++++++++++++++++++++++ tests/test_metrics.py | 66 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 broker_sync/metrics.py create mode 100644 tests/test_metrics.py diff --git a/broker_sync/metrics.py b/broker_sync/metrics.py new file mode 100644 index 0000000..41566d8 --- /dev/null +++ b/broker_sync/metrics.py @@ -0,0 +1,51 @@ +"""Pushgateway client for broker-sync providers. + +One function: push a list of (metric, labels, value) tuples to Prometheus +Pushgateway under a given job name. Used by providers to surface per-run +drift / staleness / row counts that Prometheus can alert on. + +In-cluster URL: http://prometheus-prometheus-pushgateway.monitoring:9091/metrics +Pass via the ``pushgateway_url`` argument or the ``PUSHGATEWAY_URL`` env var. +""" +from __future__ import annotations + +import logging +import os +from collections.abc import Iterable + +import httpx + +log = logging.getLogger(__name__) + + +def _format_metric(name: str, labels: dict[str, str], value: float) -> str: + if labels: + body = ",".join(f'{k}="{v}"' for k, v in sorted(labels.items())) + return f"{name}{{{body}}} {value}\n" + return f"{name} {value}\n" + + +async def push_pushgateway( + job: str, + metrics: Iterable[tuple[str, dict[str, str], float]], + pushgateway_url: str | None = None, + transport: httpx.AsyncBaseTransport | None = None, +) -> None: + """POST text-format metrics to Pushgateway under ``job``. + + ``pushgateway_url`` falls back to the env var ``PUSHGATEWAY_URL``. + Raises ``RuntimeError`` if the URL is unset or POST returns non-2xx. + """ + url = pushgateway_url or os.environ.get("PUSHGATEWAY_URL") + if not url: + raise RuntimeError("PUSHGATEWAY_URL not set and no override provided") + body = "".join(_format_metric(n, lbls, v) for n, lbls, v in metrics) + target = f"{url.rstrip('/')}/job/{job}" + async with httpx.AsyncClient(transport=transport, timeout=15.0) as c: + resp = await c.post(target, content=body, headers={"Content-Type": "text/plain"}) + if resp.status_code >= 300: + raise RuntimeError( + f"pushgateway POST {target} returned HTTP {resp.status_code}: " + f"{resp.text[:200]}" + ) + log.info("pushgateway: pushed %d metrics to job=%s", len(body.splitlines()), job) diff --git a/tests/test_metrics.py b/tests/test_metrics.py new file mode 100644 index 0000000..6a82012 --- /dev/null +++ b/tests/test_metrics.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import httpx +import pytest + +from broker_sync.metrics import push_pushgateway + + +async def test_push_pushgateway_posts_text_format() -> None: + captured: dict[str, str] = {} + + def transport_handler(request: httpx.Request) -> httpx.Response: + captured["url"] = str(request.url) + captured["method"] = request.method + captured["body"] = request.content.decode("utf-8") + return httpx.Response(200) + + transport = httpx.MockTransport(transport_handler) + await push_pushgateway( + job="broker-sync-ibkr", + metrics=[ + ("ibkr_position_drift_shares", {"symbol": "VUAG.L"}, 0.0), + ("ibkr_sync_last_success_timestamp_seconds", {}, 1779830000.0), + ], + pushgateway_url="http://pg.example/metrics", + transport=transport, + ) + assert captured["method"] == "POST" + assert captured["url"] == "http://pg.example/metrics/job/broker-sync-ibkr" + body = captured["body"] + assert 'ibkr_position_drift_shares{symbol="VUAG.L"} 0.0' in body + assert "ibkr_sync_last_success_timestamp_seconds 1779830000.0" in body + + +async def test_push_pushgateway_raises_on_non_2xx() -> None: + transport = httpx.MockTransport(lambda r: httpx.Response(500, text="boom")) + with pytest.raises(RuntimeError, match="pushgateway.*500"): + await push_pushgateway( + job="x", + metrics=[("m", {}, 1.0)], + pushgateway_url="http://pg/metrics", + transport=transport, + ) + + +async def test_push_pushgateway_uses_env_var(monkeypatch: pytest.MonkeyPatch) -> None: + captured: dict[str, str] = {} + + def handler(request: httpx.Request) -> httpx.Response: + captured["url"] = str(request.url) + return httpx.Response(200) + + transport = httpx.MockTransport(handler) + monkeypatch.setenv("PUSHGATEWAY_URL", "http://from-env/metrics") + await push_pushgateway( + job="j", + metrics=[("m", {}, 1.0)], + transport=transport, + ) + assert captured["url"] == "http://from-env/metrics/job/j" + + +async def test_push_pushgateway_raises_when_url_missing(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PUSHGATEWAY_URL", raising=False) + with pytest.raises(RuntimeError, match="PUSHGATEWAY_URL not set"): + await push_pushgateway(job="j", metrics=[("m", {}, 1.0)]) From 882415464e3bee2d23d4d491dab0d8c9d82d430b Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 22:26:24 +0000 Subject: [PATCH 32/44] wealthfolio: add compute_position_qty for broker reconciliation --- broker_sync/sinks/wealthfolio.py | 44 ++++++++++++++++++++++++ tests/sinks/test_wealthfolio.py | 59 ++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/broker_sync/sinks/wealthfolio.py b/broker_sync/sinks/wealthfolio.py index 7144f6f..51a2d41 100644 --- a/broker_sync/sinks/wealthfolio.py +++ b/broker_sync/sinks/wealthfolio.py @@ -315,6 +315,50 @@ class WealthfolioSink: total += amt return total + async def compute_position_qty(self, account_id: str) -> dict[str, Decimal]: + """Return per-symbol net position quantity (BUY/IN minus SELL/OUT) for + one account. Skips cash activities and unknown activity types. + + Used by the IBKR reconciliation step to compare against broker-reported + OpenPositions. + """ + qty_by_symbol: dict[str, Decimal] = {} + page = 1 + while True: + resp = await self._request( + "POST", _ACTIVITIES_SEARCH, + json={"accountIds": [account_id], "page": page, "pageSize": 500}, + ) + resp.raise_for_status() + payload = resp.json() + activities = payload.get("activities", []) if isinstance(payload, dict) else [] + if not activities: + break + for act in activities: + if not isinstance(act, dict): + continue + symbol = act.get("symbol") or "" + if not symbol or symbol.startswith("$CASH"): + continue + act_type = act.get("activityType") or "" + sign: int + if act_type in {"BUY", "ADD_HOLDING", "TRANSFER_IN"}: + sign = 1 + elif act_type in {"SELL", "REMOVE_HOLDING", "TRANSFER_OUT"}: + sign = -1 + else: + continue + try: + qty = Decimal(str(act.get("quantity") or 0)) + except Exception: + continue + qty_by_symbol[symbol] = qty_by_symbol.get(symbol, Decimal(0)) + sign * qty + total_pages = int(payload.get("totalPages") or 1) if isinstance(payload, dict) else 1 + if page >= total_pages: + break + page += 1 + return qty_by_symbol + # -- manual holdings snapshots -- async def push_manual_snapshots( diff --git a/tests/sinks/test_wealthfolio.py b/tests/sinks/test_wealthfolio.py index 436e52b..2b43681 100644 --- a/tests/sinks/test_wealthfolio.py +++ b/tests/sinks/test_wealthfolio.py @@ -373,3 +373,62 @@ async def test_push_manual_snapshots_short_circuits_on_empty( sink = _client(httpx.MockTransport(handler), sp) result = await sink.push_manual_snapshots(account_id="acct", snapshots=[]) assert result["snapshotsImported"] == 0 + + +# -- compute_position_qty (used by IBKR reconciliation) -- + + +@pytest.mark.asyncio +async def test_compute_position_qty_sums_buys_minus_sells(tmp_path: Path) -> None: + """Sums BUY/ADD_HOLDING/TRANSFER_IN minus SELL/REMOVE_HOLDING/TRANSFER_OUT + quantities per symbol, skipping cash activities.""" + sp = tmp_path / "s.json" + sp.write_text(json.dumps({"cookies": {"wf_token": "fresh"}})) + + page_1: dict[str, Any] = { + "activities": [ + {"symbol": "VUAG.L", "activityType": "BUY", "quantity": "10"}, + {"symbol": "VUAG.L", "activityType": "SELL", "quantity": "2"}, + {"symbol": "AAPL", "activityType": "BUY", "quantity": "5"}, + {"symbol": "$CASH-GBP", "activityType": "DEPOSIT", "quantity": "0", + "amount": "100"}, + # Unknown activity type — must be skipped, not crash. + {"symbol": "VUAG.L", "activityType": "DIVIDEND", "quantity": "0", + "amount": "0.5"}, + ], + "totalPages": 1, + } + + async def handler(req: httpx.Request) -> httpx.Response: + if req.url.path == "/api/v1/activities/search": + return httpx.Response(200, json=page_1) + raise AssertionError(f"unexpected request: {req.method} {req.url.path}") + + sink = _client(httpx.MockTransport(handler), sp) + result = await sink.compute_position_qty("acct-123") + assert result == {"VUAG.L": Decimal("8"), "AAPL": Decimal("5")} + + +@pytest.mark.asyncio +async def test_compute_position_qty_paginates(tmp_path: Path) -> None: + """Walks all pages until totalPages reached.""" + sp = tmp_path / "s.json" + sp.write_text(json.dumps({"cookies": {"wf_token": "fresh"}})) + + pages: dict[int, dict[str, Any]] = { + 1: {"activities": [{"symbol": "VUAG.L", "activityType": "BUY", + "quantity": "3"}], "totalPages": 2}, + 2: {"activities": [{"symbol": "VUAG.L", "activityType": "BUY", + "quantity": "4"}], "totalPages": 2}, + } + seen_pages: list[int] = [] + + async def handler(req: httpx.Request) -> httpx.Response: + body = json.loads(req.content) + seen_pages.append(body["page"]) + return httpx.Response(200, json=pages[body["page"]]) + + sink = _client(httpx.MockTransport(handler), sp) + result = await sink.compute_position_qty("acct-x") + assert sorted(seen_pages) == [1, 2] + assert result == {"VUAG.L": Decimal("7")} From e83c5a0a8fe72515fc9f568a6f35ce100ac1899d Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 22:28:35 +0000 Subject: [PATCH 33/44] =?UTF-8?q?ibkr:=20add=20Flex=20provider=20=E2=80=94?= =?UTF-8?q?=20Trade/Cash=20mapping=20+=20OpenPositions=20snapshot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maps Trades (BUY/SELL) and CashTransactions (DIVIDEND, TAX, INTEREST, FEE, DEPOSIT, WITHDRAWAL) from an IBKR Flex Activity Query to broker-sync Activity objects. Adds canonical_symbol helper (LSE → .L suffix when exchange=LSE* or currency=GBP). Exposes OpenPositions for the reconciliation step that runs at the CLI layer. Guards against wrong-account writes by checking stmt.accountId == IBKR_ACCOUNT_ID_UPSTREAM before yielding any activities. 13 unit tests cover all the mappings + the mismatch guard. Co-Authored-By: Claude Opus 4.7 --- broker_sync/providers/ibkr.py | 255 ++++++++++++++++++++++++++++++++++ tests/providers/test_ibkr.py | 199 ++++++++++++++++++++++++++ 2 files changed, 454 insertions(+) create mode 100644 broker_sync/providers/ibkr.py create mode 100644 tests/providers/test_ibkr.py diff --git a/broker_sync/providers/ibkr.py b/broker_sync/providers/ibkr.py new file mode 100644 index 0000000..f156a3f --- /dev/null +++ b/broker_sync/providers/ibkr.py @@ -0,0 +1,255 @@ +"""Interactive Brokers Flex Web Service ingestion provider. + +Pulls daily Activity Flex Query reports via the ``ibflex`` library, maps +Trades + CashTransactions to broker-sync ``Activity`` objects, and runs a +reconciliation step against the broker-reported ``OpenPositions``. + +See ``docs/specs/2026-05-26-ibkr-ingest-design.md`` for the full design. +""" +from __future__ import annotations + +import logging +from collections.abc import AsyncIterator +from datetime import UTC, date, datetime +from decimal import Decimal +from typing import Any + +from broker_sync.models import Account, AccountType, Activity, ActivityType + +log = logging.getLogger(__name__) + +# Map IBKR currency → default exchange suffix. +# Today: GBP → LSE (.L). Extend when more accounts onboard. +_LSE_EXCHANGES = {"LSE", "LSEETF", "LSEIOB1"} +_GBP_SUFFIX = ".L" + + +def canonical_symbol(symbol: str, *, exchange: str | None, currency: str) -> str: + """Return the WF-canonical form of an IBKR ticker. + + LSE-listed GBP instruments get a ``.L`` suffix (Wealthfolio convention). + US instruments and anything already suffixed are returned unchanged. + """ + if "." in symbol: + return symbol + if exchange in _LSE_EXCHANGES or (exchange is None and currency == "GBP"): + return symbol + _GBP_SUFFIX + return symbol + + +def _to_utc_datetime(value: Any, time_value: Any = None) -> datetime: + """Combine a date (with optional time) into a UTC datetime.""" + if isinstance(value, datetime): + dt = value + elif isinstance(value, date): + if isinstance(time_value, str): + dt = datetime.fromisoformat(f"{value.isoformat()}T{time_value}") + elif hasattr(time_value, "isoformat"): + dt = datetime.fromisoformat(f"{value.isoformat()}T{time_value.isoformat()}") + else: + dt = datetime.fromisoformat(f"{value.isoformat()}T00:00:00") + else: + # Last-resort: ISO string + dt = datetime.fromisoformat(str(value)) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=UTC) + return dt.astimezone(UTC) + + +def _map_trade_to_activity(trade: Any, *, account_id: str) -> Activity: + """Map one ibflex Trade dataclass to a broker-sync Activity.""" + buy_sell_obj = trade.buySell + buy_sell = buy_sell_obj.name if hasattr(buy_sell_obj, "name") else str(buy_sell_obj) + if buy_sell == "BUY": + activity_type = ActivityType.BUY + elif buy_sell == "SELL": + activity_type = ActivityType.SELL + else: + raise ValueError( + f"unsupported Trade.buySell={buy_sell!r} on tradeID={trade.tradeID}" + ) + + exchange = getattr(trade, "exchange", None) + symbol = canonical_symbol( + str(trade.symbol), + exchange=str(exchange) if exchange is not None else None, + currency=str(trade.currency), + ) + quantity = abs(Decimal(str(trade.quantity))) + unit_price = Decimal(str(trade.tradePrice)) + commission = trade.ibCommission if trade.ibCommission is not None else Decimal(0) + fee = abs(Decimal(str(commission))) + return Activity( + external_id=f"ibkr:trade:{trade.tradeID}", + account_id=account_id, + account_type=AccountType.GIA, + date=_to_utc_datetime(trade.tradeDate, getattr(trade, "tradeTime", None)), + activity_type=activity_type, + currency=str(trade.currency), + symbol=symbol, + quantity=quantity, + unit_price=unit_price, + fee=fee, + ) + + +# Map known IBKR Flex CashTransaction.type values to broker-sync ActivityType. +# Unknown values yield None + a WARNING — we refuse to guess. +_CASH_TYPE_MAP: dict[str, ActivityType] = { + "DIVIDEND": ActivityType.DIVIDEND, + "DIVIDENDS": ActivityType.DIVIDEND, + "PAYMENT_IN_LIEU_OF_DIVIDENDS": ActivityType.DIVIDEND, + "WITHHOLDING_TAX": ActivityType.TAX, + "WHTAX": ActivityType.TAX, + "BROKER_INTEREST_RECEIVED": ActivityType.INTEREST, + "BROKER_INTEREST_PAID": ActivityType.FEE, + "COMMISSION_ADJUSTMENTS": ActivityType.FEE, + "OTHER_FEES": ActivityType.FEE, +} + +_DEPOSIT_WITHDRAWAL_TYPES = { + "DEPOSITS_WITHDRAWALS", + "DEPOSIT_WITHDRAWALS", + "DEPOSITWITHDRAW", +} + + +def _normalise_cash_type(type_obj: Any) -> str: + """Canonicalise the IBKR Flex CashTransaction.type enum to an UPPER_SNAKE name.""" + if hasattr(type_obj, "name"): + return str(type_obj.name).upper() + return str(type_obj).strip().upper().replace(" ", "_").replace("&", "AND") + + +def _map_cash_to_activity(cash: Any, *, account_id: str) -> Activity | None: + """Map one ibflex CashTransaction to a broker-sync Activity. + + Returns None for unsupported types (logged at WARNING). + """ + type_name = _normalise_cash_type(cash.type) + amount = Decimal(str(cash.amount)) + + if type_name in _DEPOSIT_WITHDRAWAL_TYPES: + activity_type = ActivityType.DEPOSIT if amount > 0 else ActivityType.WITHDRAWAL + else: + mapped = _CASH_TYPE_MAP.get(type_name) + if mapped is None: + log.warning( + "ibkr: skipping cash transaction id=%s with unsupported type=%r", + getattr(cash, "transactionID", "?"), + type_name, + ) + return None + activity_type = mapped + + dt_raw = cash.dateTime + dt = _to_utc_datetime(dt_raw) if dt_raw is not None else datetime.now(UTC) + + return Activity( + external_id=f"ibkr:cash:{cash.transactionID}", + account_id=account_id, + account_type=AccountType.GIA, + date=dt, + activity_type=activity_type, + currency=str(cash.currency), + amount=abs(amount), + ) + + +class IBKRError(Exception): + """Base class for ibkr-provider errors.""" + + +class IBKRAccountMismatchError(IBKRError): + """Flex statement accountId did not match configured upstream id.""" + + +class IBKRProvider: + """Fetches IBKR Flex Activity reports and yields broker-sync Activities. + + Reconciliation (OpenPositions vs WF-computed qty) is NOT part of + ``fetch()`` — it runs at the CLI layer after import, where the + WealthfolioSink is available to query WF. + """ + + def __init__( + self, + *, + token: str, + query_id: str, + wf_account_id: str, + upstream_account_id: str, + ) -> None: + self._token = token + self._query_id = query_id + self._wf_account_id = wf_account_id + self._upstream_account_id = upstream_account_id + # Stashed for the reconciliation step after fetch() drains. + self._last_response: Any = None + + def accounts(self) -> list[Account]: + return [ + Account( + id=self._wf_account_id, + name="Interactive Brokers (UK)", + account_type=AccountType.GIA, + currency="GBP", # FX-aware per-trade; account ccy is GBP + provider="ibkr", + ) + ] + + async def close(self) -> None: + # ibflex.client uses synchronous `requests` under the hood; no resources to close. + return + + async def fetch( + self, + *, + since: datetime | None = None, # Flex query owns the date range + before: datetime | None = None, + ) -> AsyncIterator[Activity]: + from ibflex import client as ib_client + from ibflex import parser as ib_parser + + del since, before # unused; Flex query defines the period + + xml_bytes = ib_client.download(self._token, self._query_id) + response = ib_parser.parse(xml_bytes) + self._last_response = response + + if not response.FlexStatements: + log.warning("ibkr: Flex response had no FlexStatements") + return + + stmt = response.FlexStatements[0] + if str(stmt.accountId) != self._upstream_account_id: + raise IBKRAccountMismatchError( + f"Flex statement.accountId={stmt.accountId!r} does not match " + f"configured IBKR_ACCOUNT_ID_UPSTREAM={self._upstream_account_id!r} " + f"— refusing to ingest" + ) + + for trade in stmt.Trades or []: + yield _map_trade_to_activity(trade, account_id=self._wf_account_id) + + for cash in stmt.CashTransactions or []: + activity = _map_cash_to_activity(cash, account_id=self._wf_account_id) + if activity is not None: + yield activity + + def open_positions(self) -> list[tuple[str, Decimal]]: + """Return ``[(canonical_symbol, position_qty), ...]`` from the most + recent fetch. Empty list before the first ``fetch()`` call.""" + if self._last_response is None: + return [] + stmt = self._last_response.FlexStatements[0] + out: list[tuple[str, Decimal]] = [] + for pos in stmt.OpenPositions or []: + exchange = getattr(pos, "exchange", None) + symbol = canonical_symbol( + str(pos.symbol), + exchange=str(exchange) if exchange is not None else None, + currency=str(pos.currency), + ) + out.append((symbol, Decimal(str(pos.position)))) + return out diff --git a/tests/providers/test_ibkr.py b/tests/providers/test_ibkr.py new file mode 100644 index 0000000..ea83e26 --- /dev/null +++ b/tests/providers/test_ibkr.py @@ -0,0 +1,199 @@ +from __future__ import annotations + +from datetime import datetime +from decimal import Decimal + +import pytest + +from broker_sync.models import ActivityType +from broker_sync.providers.ibkr import ( + IBKRAccountMismatchError, + IBKRProvider, + _map_cash_to_activity, + _map_trade_to_activity, + canonical_symbol, +) + +# -- canonical_symbol -- + + +def test_canonical_symbol_lse_etf_gets_l_suffix() -> None: + assert canonical_symbol("VUAG", exchange="LSEETF", currency="GBP") == "VUAG.L" + + +def test_canonical_symbol_us_stock_unchanged() -> None: + assert canonical_symbol("AAPL", exchange="NASDAQ", currency="USD") == "AAPL" + + +def test_canonical_symbol_lse_gbp_inferred_when_exchange_missing() -> None: + """IBKR Flex sometimes omits exchange — infer LSE from currency==GBP.""" + assert canonical_symbol("VUAG", exchange=None, currency="GBP") == "VUAG.L" + + +def test_canonical_symbol_already_suffixed_unchanged() -> None: + assert canonical_symbol("VUAG.L", exchange="LSEETF", currency="GBP") == "VUAG.L" + + +# -- Trade mapping -- + + +def test_map_trade_buy_to_activity() -> None: + from ibflex import parser + + r = parser.parse("tests/fixtures/ibkr/sample_flex.xml") + trade = r.FlexStatements[0].Trades[0] # T1001: 10 VUAG BUY @ 107.50 GBP, comm -1.05 + + activity = _map_trade_to_activity(trade, account_id="wf-acct-uuid") + + assert activity.external_id == "ibkr:trade:T1001" + assert activity.account_id == "wf-acct-uuid" + assert activity.activity_type == ActivityType.BUY + assert activity.symbol == "VUAG.L" + assert activity.quantity == Decimal("10") + assert activity.unit_price == Decimal("107.50") + assert activity.fee == Decimal("1.05") + assert activity.currency == "GBP" + assert isinstance(activity.date, datetime) + assert activity.date.tzinfo is not None + + +def test_map_trade_sell_to_activity() -> None: + from ibflex import parser + + r = parser.parse("tests/fixtures/ibkr/sample_flex.xml") + trade = r.FlexStatements[0].Trades[2] # T1003: 2 VUAG SELL @ 108.00 GBP + + activity = _map_trade_to_activity(trade, account_id="wf-acct") + assert activity.activity_type == ActivityType.SELL + assert activity.symbol == "VUAG.L" + assert activity.quantity == Decimal("2") + assert activity.unit_price == Decimal("108.00") + + +def test_map_trade_us_stock_keeps_usd_currency_and_no_suffix() -> None: + from ibflex import parser + + r = parser.parse("tests/fixtures/ibkr/sample_flex.xml") + trade = r.FlexStatements[0].Trades[1] # T1002: AAPL BUY USD + + activity = _map_trade_to_activity(trade, account_id="wf-acct") + assert activity.symbol == "AAPL" + assert activity.currency == "USD" + + +# -- Cash mapping -- + + +def test_map_cash_dividend_to_activity() -> None: + from ibflex import parser + + r = parser.parse("tests/fixtures/ibkr/sample_flex.xml") + cash = r.FlexStatements[0].CashTransactions[0] # C5001: Dividends 3.50 GBP + + activity = _map_cash_to_activity(cash, account_id="wf-acct") + assert activity is not None + assert activity.external_id == "ibkr:cash:C5001" + assert activity.activity_type == ActivityType.DIVIDEND + assert activity.amount == Decimal("3.50") + assert activity.currency == "GBP" + + +def test_map_cash_withholding_tax_to_tax_activity() -> None: + from ibflex import parser + + r = parser.parse("tests/fixtures/ibkr/sample_flex.xml") + cash = r.FlexStatements[0].CashTransactions[1] # C5002: Withholding Tax -0.35 GBP + + activity = _map_cash_to_activity(cash, account_id="wf-acct") + assert activity is not None + assert activity.activity_type == ActivityType.TAX + assert activity.amount == Decimal("0.35") # always positive on Activity + + +def test_map_cash_unknown_type_returns_none_and_logs(caplog: pytest.LogCaptureFixture) -> None: + """Unknown CashTransaction.type produces None + a WARNING log line.""" + + class FakeType: + name = "FrobnicatedThing" + + class FakeCash: + transactionID = "C9999" + dateTime = None + type = FakeType() + amount = Decimal("0") + currency = "GBP" + + with caplog.at_level("WARNING"): + result = _map_cash_to_activity(FakeCash, account_id="wf-acct") + assert result is None + assert any("FROBNICATEDTHING" in r.message for r in caplog.records) + + +# -- IBKRProvider end-to-end -- + + +async def test_ibkr_provider_fetch_returns_mapped_activities( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """IBKRProvider.fetch() yields all mapped activities (trades + cash).""" + from ibflex import client as ib_client + + with open("tests/fixtures/ibkr/sample_flex.xml", "rb") as f: + xml_bytes = f.read() + monkeypatch.setattr(ib_client, "download", lambda *a, **kw: xml_bytes) + + provider = IBKRProvider( + token="t", + query_id="q", + wf_account_id="wf-acct", + upstream_account_id="U12345678", + ) + activities = [a async for a in provider.fetch()] + # 3 trades + 2 cash = 5 + assert len(activities) == 5 + types = sorted(a.activity_type.name for a in activities) + assert types == ["BUY", "BUY", "DIVIDEND", "SELL", "TAX"] + + +async def test_ibkr_provider_account_mismatch_raises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Mismatched accountId raises and writes nothing.""" + from ibflex import client as ib_client + + with open("tests/fixtures/ibkr/sample_flex.xml", "rb") as f: + xml_bytes = f.read() + monkeypatch.setattr(ib_client, "download", lambda *a, **kw: xml_bytes) + + provider = IBKRProvider( + token="t", + query_id="q", + wf_account_id="wf-acct", + upstream_account_id="U99999999", # WRONG + ) + with pytest.raises(IBKRAccountMismatchError, match="U12345678"): + _ = [a async for a in provider.fetch()] + + +async def test_ibkr_provider_open_positions_after_fetch( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """open_positions() returns canonicalised symbol + qty after fetch drained.""" + from ibflex import client as ib_client + + with open("tests/fixtures/ibkr/sample_flex.xml", "rb") as f: + xml_bytes = f.read() + monkeypatch.setattr(ib_client, "download", lambda *a, **kw: xml_bytes) + + provider = IBKRProvider( + token="t", + query_id="q", + wf_account_id="wf-acct", + upstream_account_id="U12345678", + ) + # drain the iterator before reading positions + [a async for a in provider.fetch()] + + positions = provider.open_positions() + # VUAG → VUAG.L (LSE inferred from GBP); AAPL unchanged (USD) + assert dict(positions) == {"VUAG.L": Decimal("8"), "AAPL": Decimal("5")} From a4dab03bc5b0b8cb9500e39e2cb660858214a4c6 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 22:29:44 +0000 Subject: [PATCH 34/44] cli: add 'broker-sync ibkr' command (Flex pull + import + reconcile + metrics) --- broker_sync/cli.py | 94 +++++++++++++++++++++++++++++++++++ broker_sync/providers/ibkr.py | 2 + 2 files changed, 96 insertions(+) diff --git a/broker_sync/cli.py b/broker_sync/cli.py index 6e08eb8..cef7526 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -230,6 +230,100 @@ def invest_engine( asyncio.run(_run()) +@app.command("ibkr") +def ibkr( + wf_base_url: str = typer.Option(..., envvar="WF_BASE_URL"), + wf_username: str = typer.Option(..., envvar="WF_USERNAME"), + wf_password: str = typer.Option(..., envvar="WF_PASSWORD"), + wf_session_path: str = typer.Option( + "/data/wealthfolio_session.json", envvar="WF_SESSION_PATH" + ), + ibkr_flex_token: str = typer.Option(..., envvar="IBKR_FLEX_TOKEN"), + ibkr_flex_query_id: str = typer.Option(..., envvar="IBKR_FLEX_QUERY_ID"), + ibkr_account_id: str = typer.Option(..., envvar="IBKR_ACCOUNT_ID"), + ibkr_account_id_upstream: str = typer.Option(..., envvar="IBKR_ACCOUNT_ID_UPSTREAM"), + pushgateway_url: str = typer.Option( + "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics", + envvar="PUSHGATEWAY_URL", + ), + data_dir: str = typer.Option("/data", envvar="BROKER_SYNC_DATA_DIR"), +) -> None: + """Phase 2c — daily IBKR Flex Web Service → Wealthfolio sync. + + Pulls an Activity Flex Query (Trades + Cash + OpenPositions), maps to + broker-sync Activities, pushes through the shared pipeline, then + reconciles broker-reported OpenPositions against WF-computed quantities + and publishes a Pushgateway drift metric. + """ + import time + from decimal import Decimal + + from broker_sync.dedup import SyncRecordStore + from broker_sync.metrics import push_pushgateway + from broker_sync.pipeline import sync_provider_to_wealthfolio + from broker_sync.providers.ibkr import IBKRAccountMismatchError, IBKRProvider + from broker_sync.sinks.wealthfolio import WealthfolioSink + + _setup_logging() + data = Path(data_dir) + data.mkdir(parents=True, exist_ok=True) + + async def _run() -> None: + sink = WealthfolioSink( + base_url=wf_base_url, + username=wf_username, + password=wf_password, + session_path=wf_session_path, + ) + provider = IBKRProvider( + token=ibkr_flex_token, + query_id=ibkr_flex_query_id, + wf_account_id=ibkr_account_id, + upstream_account_id=ibkr_account_id_upstream, + ) + dedup = SyncRecordStore(data / "sync.db") + try: + if not Path(wf_session_path).exists(): + await sink.login() + result = await sync_provider_to_wealthfolio( + provider=provider, + sink=sink, + dedup=dedup, + ) + + # Reconciliation: broker truth vs WF truth. + wf_qty = await sink.compute_position_qty(ibkr_account_id) + drift_metrics: list[tuple[str, dict[str, str], float]] = [] + for symbol, broker_qty in provider.open_positions(): + drift = broker_qty - wf_qty.get(symbol, Decimal(0)) + drift_metrics.append( + ( + "ibkr_position_drift_shares", + {"symbol": symbol, "account": "ibkr-uk"}, + float(drift), + ) + ) + drift_metrics.append( + ("ibkr_sync_last_success_timestamp_seconds", {}, float(time.time())) + ) + await push_pushgateway("broker-sync-ibkr", drift_metrics, pushgateway_url) + except IBKRAccountMismatchError as e: + typer.echo(f"IBKR: {e}", err=True) + sys.exit(2) + finally: + await provider.close() + await sink.close() + + typer.echo( + f"ibkr: fetched={result.fetched} new={result.new_after_dedup} " + f"imported={result.imported} failed={result.failed}" + ) + if result.failed > 0: + sys.exit(1) + + asyncio.run(_run()) + + @app.command("finance-mysql-import") def finance_mysql_import( wf_base_url: str = typer.Option(..., envvar="WF_BASE_URL"), diff --git a/broker_sync/providers/ibkr.py b/broker_sync/providers/ibkr.py index f156a3f..741c79a 100644 --- a/broker_sync/providers/ibkr.py +++ b/broker_sync/providers/ibkr.py @@ -172,6 +172,8 @@ class IBKRProvider: WealthfolioSink is available to query WF. """ + name = "ibkr" + def __init__( self, *, From 2fb1fbbdd8b38e9fe01ff4121786b9580052efb1 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 22:34:46 +0000 Subject: [PATCH 35/44] docs: add IBKR provider runbook --- docs/providers/ibkr.md | 124 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 docs/providers/ibkr.md diff --git a/docs/providers/ibkr.md b/docs/providers/ibkr.md new file mode 100644 index 0000000..a21df5b --- /dev/null +++ b/docs/providers/ibkr.md @@ -0,0 +1,124 @@ +# Provider: Interactive Brokers (IBKR Flex Web Service) + +Pulls a daily Activity Flex Query via the [`ibflex`](https://github.com/csingley/ibflex) +library, maps Trades + CashTransactions to broker-sync Activities, and +reconciles broker-side OpenPositions against WF-computed quantities. + +## When this runs + +- K8s CronJob `broker-sync-ibkr` in the `broker-sync` namespace, daily 02:00 UK. +- Manual trigger: + ```bash + kubectl -n broker-sync create job --from=cronjob/broker-sync-ibkr broker-sync-ibkr-manual-$(date +%s) + ``` + +## Vault secrets — `secret/broker-sync` + +| Key | Description | +|---|---| +| `ibkr_flex_token` | Flex Web Service token (1-year validity, rotate via IBKR Client Portal). | +| `ibkr_flex_query_id` | Activity Flex Query ID (5–7 digit number). | +| `ibkr_account_id` | Wealthfolio account UUID for "Interactive Brokers (UK)". | +| `ibkr_account_id_upstream` | IBKR-side account number (e.g. `U12345678`) — guards against wrong-account ingestion. | + +ExternalSecret `broker-sync-secrets` syncs all keys from `secret/broker-sync` +to a K8s secret of the same name. New keys take ~15 min to propagate. + +## IBKR Flex Query design + +In IBKR Client Portal → Reports → Flex Queries → Activity Flex Query, create +a new query named `broker-sync-activity` with: + +| Section | Required fields | +|---|---| +| Account Information | accountId | +| Trades | tradeID, tradeDate, tradeTime, symbol, buySell, quantity, tradePrice, currency, ibCommission, assetCategory, exchange | +| Cash Transactions | transactionID, dateTime, type, amount, currency, description | +| Open Positions | symbol, position, markPrice, currency, assetCategory, exchange | +| Securities Information | symbol, description, conid | + +**Date Format:** `yyyy-MM-dd`. **Time Format:** `HH:mm:ss` (no timezone +suffix — ibflex 1.1 rejects timezone abbreviations in the time field). +**Date Range:** `Last Business Day` for daily incremental. Switch to +`Year to Date` only for one-off backfills. + +## Cash type mapping + +| IBKR Flex `CashTransaction.type` | broker-sync `ActivityType` | +|---|---| +| Dividends | DIVIDEND | +| Withholding Tax | TAX | +| Broker Interest Received | INTEREST | +| Broker Interest Paid | FEE | +| Commission Adjustments | FEE | +| Other Fees | FEE | +| Deposits & Withdrawals | DEPOSIT (amount > 0) / WITHDRAWAL (amount < 0) | +| anything else | skipped + WARNING logged (refuse to guess) | + +## Dedup keys + +- Trades: `external_id = "ibkr:trade:" + tradeID` +- Cash: `external_id = "ibkr:cash:" + transactionID` + +Both are stable across re-runs; `dedup.SyncRecordStore` rejects already- +synced IDs. + +## Symbol canonicalisation + +LSE-listed GBP instruments get a `.L` suffix (Wealthfolio convention). +US instruments and anything already suffixed pass through unchanged. + +The heuristic: `exchange in {LSE, LSEETF, LSEIOB1}` OR +`(exchange is None AND currency == GBP)` → suffix with `.L`. Edge cases +not yet covered (Euronext, XETRA) — extend `canonical_symbol` when those +holdings exist. + +## Position reconciliation + +Each run pushes to Pushgateway under job `broker-sync-ibkr`: +- `ibkr_position_drift_shares{symbol, account="ibkr-uk"}` — + broker_qty − wf_qty per asset. +- `ibkr_sync_last_success_timestamp_seconds` — unix timestamp. + +Alerts (TODO, will be added to the monitoring stack on first +non-zero drift): +- `IBKRPositionDrift{symbol}` — `|drift| > 0.01` for >24h, Slack `#security`. +- `IBKRSyncStale` — timestamp > 36h old. +- `IBKRFlexTokenExpired` — Loki rule on the "code 1003" log line. + +## Account guard + +Before yielding any activities, the provider checks +`flex.accountId == IBKR_ACCOUNT_ID_UPSTREAM`. Mismatch → raises +`IBKRAccountMismatchError` and writes nothing. Prevents wrong-account +ingestion from a misconfigured query (e.g., someone replaced the token +with another user's by mistake). + +## Token rotation + +Flex tokens expire after 1 year. When the cron starts failing with +`ResponseCodeError(code=1003)`: + +1. Sign in to IBKR Client Portal → Reports → Settings → Flex Web Service + → regenerate token. +2. `vault kv patch secret/broker-sync ibkr_flex_token=''` +3. ExternalSecrets controller picks up the new value within ~15 min; no + manual pod restart needed. + +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---|---|---| +| `IBKR_FLEX_TOKEN not provided` exit 2 | Vault has placeholder value or key missing | `vault kv patch secret/broker-sync ibkr_flex_token=''` | +| `IBKRAccountMismatchError` | `ibkr_account_id_upstream` doesn't match the account in the Flex query | Re-check IBKR account number; fix the Vault value | +| `ResponseCodeError(code=1003)` | Flex token expired | See "Token rotation" above | +| `StatementGenerationTimeout` | IBKR side slow | Single retry built in; if it persists, try a smaller date range | +| `Can't convert '... TZ' to time` parser error | Flex query has Time Format with timezone suffix | Switch to `HH:mm:ss` (no TZ) in Flex query settings | +| `'ETF' is not a valid AssetClass` | ETF set in fixture not in ibflex enum | Use `STK` in fixtures (IBKR Flex categorises ETFs under STK) | + +## References + +- Spec: [`docs/specs/2026-05-26-ibkr-ingest-design.md`](../specs/2026-05-26-ibkr-ingest-design.md) +- Plan: [`docs/plans/2026-05-26-ibkr-flex-ingestion.md`](../plans/2026-05-26-ibkr-flex-ingestion.md) +- Library: +- IBKR Flex Web Service docs: From 0ab069349f8b2968f3f265100a74f8c3dba2bd9e Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 22:52:11 +0000 Subject: [PATCH 36/44] sinks/wealthfolio: treat duplicates as success in import-summary check The IMAP cronjob re-processes the full mailbox window on every run, so on steady-state runs all activities come back tagged duplicate=N. The existing logic raises ImportValidationError whenever imported_n < total_n, which makes the cron exit 1 (and the Job is reported FAILED) even though the data path is healthy. Treat (imported + duplicates) as "accounted for". Only raise when rows go missing entirely (silently dropped / validation rejected). Co-Authored-By: Claude Opus 4.7 --- broker_sync/sinks/wealthfolio.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/broker_sync/sinks/wealthfolio.py b/broker_sync/sinks/wealthfolio.py index 51a2d41..f459952 100644 --- a/broker_sync/sinks/wealthfolio.py +++ b/broker_sync/sinks/wealthfolio.py @@ -247,10 +247,14 @@ class WealthfolioSink: if summary is not None: imported_n = int(summary.get("imported", 0)) total_n = int(summary.get("total", len(valid_rows))) - if imported_n < total_n: + dupes = int(summary.get("duplicates", 0)) + skipped = int(summary.get("skipped", 0)) + # Duplicates are expected on every re-run (the cron re-processes the + # full IMAP window each night) — treat (imported + duplicates) as + # accounted-for. Only fail if something was genuinely lost. + accounted = imported_n + dupes + if accounted < total_n: err_msg = summary.get("errorMessage") or "no errorMessage" - skipped = int(summary.get("skipped", 0)) - dupes = int(summary.get("duplicates", 0)) raise ImportValidationError(f"Wealthfolio /import persisted {imported_n}/{total_n} " f"(skipped={skipped} duplicates={dupes}). " f"errorMessage: {err_msg}") From 30af5fe2c96ab0fc91dcf4ee155815d52b4af278 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 27 May 2026 09:11:57 +0000 Subject: [PATCH 37/44] =?UTF-8?q?docs(ibkr):=20change=20Flex=20date=20rang?= =?UTF-8?q?e=20from=20Last=20Business=20Day=20=E2=86=92=20Last=2090=20Days?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trailing window backed by SyncRecordStore dedup is strictly better than a single-day window — a single missed cron run with Last Business Day loses that day's activity permanently. SyncRecordStore is keyed by ibkr:trade: / ibkr:cash:, so overlapping pulls are no-ops. Caught during the brainstorming review. Co-Authored-By: Claude Opus 4.7 --- docs/plans/2026-05-26-ibkr-flex-ingestion.md | 6 ++++-- docs/providers/ibkr.md | 7 +++++-- docs/specs/2026-05-26-ibkr-ingest-design.md | 20 +++++++++++++------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/docs/plans/2026-05-26-ibkr-flex-ingestion.md b/docs/plans/2026-05-26-ibkr-flex-ingestion.md index 7a22578..1fa57bb 100644 --- a/docs/plans/2026-05-26-ibkr-flex-ingestion.md +++ b/docs/plans/2026-05-26-ibkr-flex-ingestion.md @@ -1461,8 +1461,10 @@ OpenPositions against WF-computed quantities. | Open Positions | symbol, position, markPrice, currency, assetCategory | | Securities Information | symbol, description, conid | -Date range: `Last Business Day` for daily incremental. Switch to -`Year to Date` for one-time backfills only. +Date range: `Last 90 Days` — trailing window so a missed cron run +doesn't lose data. SyncRecordStore makes overlapping pulls idempotent. +Switch to `Year to Date` or `Custom Date Range` only for one-time +historical backfills. ## Cash type mapping diff --git a/docs/providers/ibkr.md b/docs/providers/ibkr.md index a21df5b..501c641 100644 --- a/docs/providers/ibkr.md +++ b/docs/providers/ibkr.md @@ -39,8 +39,11 @@ a new query named `broker-sync-activity` with: **Date Format:** `yyyy-MM-dd`. **Time Format:** `HH:mm:ss` (no timezone suffix — ibflex 1.1 rejects timezone abbreviations in the time field). -**Date Range:** `Last Business Day` for daily incremental. Switch to -`Year to Date` only for one-off backfills. +**Date Range:** `Last 90 Days` — trailing window so a missed cron run +doesn't lose data. SyncRecordStore (keyed by `external_id`) makes +overlapping pulls idempotent. For a one-off historical backfill, widen +temporarily to `Year to Date` or `Custom Date Range`, run once, then +switch back. ## Cash type mapping diff --git a/docs/specs/2026-05-26-ibkr-ingest-design.md b/docs/specs/2026-05-26-ibkr-ingest-design.md index 40bb9df..21aaf12 100644 --- a/docs/specs/2026-05-26-ibkr-ingest-design.md +++ b/docs/specs/2026-05-26-ibkr-ingest-design.md @@ -210,9 +210,12 @@ top. - Sections: `Account Information`, `Trades`, `Cash Transactions`, `Open Positions`, `Securities Information` - Date Format: `yyyy-MM-dd` · Time Format: `HH:mm:ss TimeZone` - - Date Range: `Last Business Day` (for daily runs; flip to - `Year to Date` only for the initial backfill — irrelevant while - account is empty) + - Date Range: `Last 90 Days` — trailing window so a missed cron run + (failed pod, outage, vacation) doesn't lose data. SyncRecordStore + keys on `ibkr:trade:` / `ibkr:cash:`, so + overlapping pulls are no-ops. `Last Business Day` was the original + choice but creates a "single missed run = permanent data loss" + failure mode — rejected in favour of dedup-backed resync window. - Format: XML - Trade fields: ensure `tradeID`, `tradeDate`, `tradeTime`, `symbol`, `buySell`, `quantity`, `tradePrice`, `currency`, `ibCommission`, @@ -248,10 +251,13 @@ curl -sS -b /tmp/wf-jar -X POST "$WF_BASE_URL/api/v1/accounts" \ ### Step 4 — Initial backfill (skip while account is empty) -When the IBKR account first holds positions, switch the Flex query -Date Range to `Year to Date`, run the CronJob manually once, verify WF -totals match the broker app, then switch the Flex query back to -`Last Business Day` for daily incremental. +When the IBKR account first holds positions, the daily CronJob will +backfill automatically up to the 90-day trailing window. For older +history, temporarily switch the Flex query Date Range to +`Year to Date` (or `Custom Date Range` with a 1-year window), run the +CronJob manually once, verify WF totals match the broker app, then +switch the Flex query back to `Last 90 Days` for daily incremental. +Dedup makes the temporary widening safe — already-synced rows are no-ops. ### Step 5 — Deploy From ceb652b62363cce2ea71147a0af07e035d72e5e4 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 27 May 2026 09:18:42 +0000 Subject: [PATCH 38/44] ibkr: use IBKR account number as the canonical Account.id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: provider passed the WF UUID as Account.id. ensure_account looks up existing accounts by (provider, providerAccountId=Account.id), so the WF-UUID-as-providerAccountId would never match the manually-created account (which has providerAccountId=U13279690), causing the pipeline to create a duplicate WF account on every cron run. Fix: Account.id is now the IBKR account number (U13279690) throughout. The pipeline's _ensure_accounts() resolves it to the WF UUID via the canonical (provider, providerAccountId) lookup; activities are remapped before import. CLI no longer takes the WF UUID — derives it post-import via a cheap idempotent ensure_account call for the reconciliation step. Co-Authored-By: Claude Opus 4.7 --- broker_sync/cli.py | 13 ++++++++++--- broker_sync/providers/ibkr.py | 12 +++++++----- tests/providers/test_ibkr.py | 3 --- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/broker_sync/cli.py b/broker_sync/cli.py index cef7526..7f855f5 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -240,7 +240,6 @@ def ibkr( ), ibkr_flex_token: str = typer.Option(..., envvar="IBKR_FLEX_TOKEN"), ibkr_flex_query_id: str = typer.Option(..., envvar="IBKR_FLEX_QUERY_ID"), - ibkr_account_id: str = typer.Option(..., envvar="IBKR_ACCOUNT_ID"), ibkr_account_id_upstream: str = typer.Option(..., envvar="IBKR_ACCOUNT_ID_UPSTREAM"), pushgateway_url: str = typer.Option( "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics", @@ -254,6 +253,10 @@ def ibkr( broker-sync Activities, pushes through the shared pipeline, then reconciles broker-reported OpenPositions against WF-computed quantities and publishes a Pushgateway drift metric. + + The Wealthfolio account UUID is resolved via the pipeline's + ensure_account(provider="ibkr", providerAccountId=IBKR_ACCOUNT_ID_UPSTREAM) + lookup — no need to wire the UUID in as a separate env var. """ import time from decimal import Decimal @@ -278,7 +281,6 @@ def ibkr( provider = IBKRProvider( token=ibkr_flex_token, query_id=ibkr_flex_query_id, - wf_account_id=ibkr_account_id, upstream_account_id=ibkr_account_id_upstream, ) dedup = SyncRecordStore(data / "sync.db") @@ -291,8 +293,13 @@ def ibkr( dedup=dedup, ) + # Resolve WF UUID for reconciliation. ensure_account is idempotent + # and already ran inside sync_provider_to_wealthfolio; this is a + # cheap re-lookup that returns the same UUID. + wf_uuid = await sink.ensure_account(provider.accounts()[0]) + # Reconciliation: broker truth vs WF truth. - wf_qty = await sink.compute_position_qty(ibkr_account_id) + wf_qty = await sink.compute_position_qty(wf_uuid) drift_metrics: list[tuple[str, dict[str, str], float]] = [] for symbol, broker_qty in provider.open_positions(): drift = broker_qty - wf_qty.get(symbol, Decimal(0)) diff --git a/broker_sync/providers/ibkr.py b/broker_sync/providers/ibkr.py index 741c79a..fcff89f 100644 --- a/broker_sync/providers/ibkr.py +++ b/broker_sync/providers/ibkr.py @@ -179,12 +179,14 @@ class IBKRProvider: *, token: str, query_id: str, - wf_account_id: str, upstream_account_id: str, ) -> None: self._token = token self._query_id = query_id - self._wf_account_id = wf_account_id + # Single source of truth — the IBKR account number (e.g. U13279690). + # The pipeline's _ensure_accounts() resolves this to a Wealthfolio + # UUID via (provider="ibkr", providerAccountId=upstream_account_id); + # activities are remapped to the WF UUID before import. self._upstream_account_id = upstream_account_id # Stashed for the reconciliation step after fetch() drains. self._last_response: Any = None @@ -192,7 +194,7 @@ class IBKRProvider: def accounts(self) -> list[Account]: return [ Account( - id=self._wf_account_id, + id=self._upstream_account_id, name="Interactive Brokers (UK)", account_type=AccountType.GIA, currency="GBP", # FX-aware per-trade; account ccy is GBP @@ -232,10 +234,10 @@ class IBKRProvider: ) for trade in stmt.Trades or []: - yield _map_trade_to_activity(trade, account_id=self._wf_account_id) + yield _map_trade_to_activity(trade, account_id=self._upstream_account_id) for cash in stmt.CashTransactions or []: - activity = _map_cash_to_activity(cash, account_id=self._wf_account_id) + activity = _map_cash_to_activity(cash, account_id=self._upstream_account_id) if activity is not None: yield activity diff --git a/tests/providers/test_ibkr.py b/tests/providers/test_ibkr.py index ea83e26..8dfba07 100644 --- a/tests/providers/test_ibkr.py +++ b/tests/providers/test_ibkr.py @@ -145,7 +145,6 @@ async def test_ibkr_provider_fetch_returns_mapped_activities( provider = IBKRProvider( token="t", query_id="q", - wf_account_id="wf-acct", upstream_account_id="U12345678", ) activities = [a async for a in provider.fetch()] @@ -168,7 +167,6 @@ async def test_ibkr_provider_account_mismatch_raises( provider = IBKRProvider( token="t", query_id="q", - wf_account_id="wf-acct", upstream_account_id="U99999999", # WRONG ) with pytest.raises(IBKRAccountMismatchError, match="U12345678"): @@ -188,7 +186,6 @@ async def test_ibkr_provider_open_positions_after_fetch( provider = IBKRProvider( token="t", query_id="q", - wf_account_id="wf-acct", upstream_account_id="U12345678", ) # drain the iterator before reading positions From bb9e0d4567d5e1c9d3ba4813363d367218badf54 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 27 May 2026 09:28:42 +0000 Subject: [PATCH 39/44] docs(ibkr): use Last 365 Days (no 'Last 90 Days' preset in IBKR UI) --- docs/plans/2026-05-26-ibkr-flex-ingestion.md | 2 +- docs/providers/ibkr.md | 2 +- docs/specs/2026-05-26-ibkr-ingest-design.md | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/plans/2026-05-26-ibkr-flex-ingestion.md b/docs/plans/2026-05-26-ibkr-flex-ingestion.md index 1fa57bb..f872fa8 100644 --- a/docs/plans/2026-05-26-ibkr-flex-ingestion.md +++ b/docs/plans/2026-05-26-ibkr-flex-ingestion.md @@ -1461,7 +1461,7 @@ OpenPositions against WF-computed quantities. | Open Positions | symbol, position, markPrice, currency, assetCategory | | Securities Information | symbol, description, conid | -Date range: `Last 90 Days` — trailing window so a missed cron run +Date range: `Last 365 Days` — trailing window so a missed cron run doesn't lose data. SyncRecordStore makes overlapping pulls idempotent. Switch to `Year to Date` or `Custom Date Range` only for one-time historical backfills. diff --git a/docs/providers/ibkr.md b/docs/providers/ibkr.md index 501c641..14b167b 100644 --- a/docs/providers/ibkr.md +++ b/docs/providers/ibkr.md @@ -39,7 +39,7 @@ a new query named `broker-sync-activity` with: **Date Format:** `yyyy-MM-dd`. **Time Format:** `HH:mm:ss` (no timezone suffix — ibflex 1.1 rejects timezone abbreviations in the time field). -**Date Range:** `Last 90 Days` — trailing window so a missed cron run +**Date Range:** `Last 365 Days` — trailing window so a missed cron run doesn't lose data. SyncRecordStore (keyed by `external_id`) makes overlapping pulls idempotent. For a one-off historical backfill, widen temporarily to `Year to Date` or `Custom Date Range`, run once, then diff --git a/docs/specs/2026-05-26-ibkr-ingest-design.md b/docs/specs/2026-05-26-ibkr-ingest-design.md index 21aaf12..9a7a813 100644 --- a/docs/specs/2026-05-26-ibkr-ingest-design.md +++ b/docs/specs/2026-05-26-ibkr-ingest-design.md @@ -210,7 +210,7 @@ top. - Sections: `Account Information`, `Trades`, `Cash Transactions`, `Open Positions`, `Securities Information` - Date Format: `yyyy-MM-dd` · Time Format: `HH:mm:ss TimeZone` - - Date Range: `Last 90 Days` — trailing window so a missed cron run + - Date Range: `Last 365 Days` — trailing window so a missed cron run (failed pod, outage, vacation) doesn't lose data. SyncRecordStore keys on `ibkr:trade:` / `ibkr:cash:`, so overlapping pulls are no-ops. `Last Business Day` was the original @@ -252,11 +252,11 @@ curl -sS -b /tmp/wf-jar -X POST "$WF_BASE_URL/api/v1/accounts" \ ### Step 4 — Initial backfill (skip while account is empty) When the IBKR account first holds positions, the daily CronJob will -backfill automatically up to the 90-day trailing window. For older +backfill automatically up to the 365-day trailing window. For older history, temporarily switch the Flex query Date Range to `Year to Date` (or `Custom Date Range` with a 1-year window), run the CronJob manually once, verify WF totals match the broker app, then -switch the Flex query back to `Last 90 Days` for daily incremental. +switch the Flex query back to `Last 365 Days` for daily incremental. Dedup makes the temporary widening safe — already-synced rows are no-ops. ### Step 5 — Deploy From abf9fa7cb54728594bae368b6558994a780f3573 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 27 May 2026 09:40:56 +0000 Subject: [PATCH 40/44] parsers/schwab: drop dead vest-release path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _parse_vest_release path and _VEST_*_RE regexes never matched a real email in 4 years of inbox history (2022-08 → 2026-05, 188 Schwab emails surveyed). Schwab Stock Plan Services does not email release confirmations to the employee address for the workplace account — only the sell-to-cover trade-executed alert lands. Vest data must come from the META payslip via payslip-ingest (tracked as code-fqgr). Removed: - _VEST_SUBJECT_RE + 5 _VEST_*_RE regexes (heuristic, never validated) - _parse_vest_release function - VestParseResult dataclass - parse_schwab_email_full wrapper - _search_group helper (only used by vest path) - 3 dead tests + the _VEST_RELEASE fixture Kept models.VestEvent — the payslip→Wealthfolio sink in code-fqgr will need it. Co-Authored-By: Claude Opus 4.7 --- broker_sync/providers/parsers/schwab.py | 193 ++---------------------- tests/providers/parsers/test_schwab.py | 56 ------- 2 files changed, 13 insertions(+), 236 deletions(-) diff --git a/broker_sync/providers/parsers/schwab.py b/broker_sync/providers/parsers/schwab.py index aeef7d0..762a613 100644 --- a/broker_sync/providers/parsers/schwab.py +++ b/broker_sync/providers/parsers/schwab.py @@ -1,79 +1,37 @@ """Schwab workplace-RSU email parser. -Two email shapes are handled: +Schwab Stock Plan Services sends a "Your trade was executed" email for +each sell-to-cover trade (and any user-initiated trade) on the workplace +account. The body has five `` +cells holding date / direction / quantity / ticker / price. -1. Trade confirmations (sell-to-cover or user-initiated trades): HTML - with five `` cells - holding date / direction / quantity / ticker / price. → one Activity. +It does NOT email vest-release / Release Confirmation messages to the +employee address for this account (verified against 4 years of inbox +history, 2022-2026 — see infra/docs in code-fqgr). Vest data must come +from the META payslip via payslip-ingest, not from email. The whole +vest-release parser that used to live here was dead code. -2. Release Confirmations (RSU vest events): subject/body mentions - "Release Confirmation" or "Award Vesting"; body lists vest date, - shares released, FMV, shares sold to cover, and USD tax withheld. - → (Activity, Activity, VestEvent) tuple: the gross vest (BUY at FMV), - the sell-to-cover (SELL at FMV), and a standalone VestEvent for the - payslip-ingest reconciliation pipeline. - -On any parse failure we return the neutral empty result (no Activities, -no VestEvent) — an unparseable email shouldn't crash the IMAP batch. +On any parse failure we return an empty list — an unparseable email +shouldn't crash the IMAP batch. """ from __future__ import annotations import logging -import re -from dataclasses import dataclass from decimal import Decimal, InvalidOperation from bs4 import BeautifulSoup from dateutil import parser as dateparser -from broker_sync.models import AccountType, Activity, ActivityType, VestEvent +from broker_sync.models import AccountType, Activity, ActivityType log = logging.getLogger(__name__) _ACCOUNT_ID = "schwab-workplace" _DEFAULT_CURRENCY = "USD" -# Vest-confirmation emails reliably include one of these phrases. Matching -# is case-insensitive and on the raw HTML (cheap — no DOM parse needed). -_VEST_SUBJECT_RE = re.compile(r"Release Confirmation|Award Vesting|RSU Release", - re.IGNORECASE) - - -@dataclass -class VestParseResult: - activities: list[Activity] - vest_event: VestEvent | None - def parse_schwab_email(raw_html: str) -> list[Activity]: - """Return a single-item list of Activity on success, empty on failure. - - For vest-confirmation emails, returns the two Activity rows (gross - vest + sell-to-cover). Use `parse_schwab_email_full` when the caller - also needs the VestEvent. - """ - return parse_schwab_email_full(raw_html).activities - - -def parse_schwab_email_full(raw_html: str) -> VestParseResult: - """Full parse — returns activities + optional VestEvent. - - Dispatches: vest-confirmation emails → `_parse_vest_release`; - everything else → the legacy single-row confirmation parser. - """ - if _VEST_SUBJECT_RE.search(raw_html): - result = _parse_vest_release(raw_html) - if result is not None: - return result - log.warning("schwab: detected vest email but could not extract fields; " - "add a real fixture to broker-sync/tests/fixtures/") - return VestParseResult(activities=[], vest_event=None) - - return VestParseResult(activities=_parse_trade_confirmation(raw_html), vest_event=None) - - -def _parse_trade_confirmation(raw_html: str) -> list[Activity]: - """Legacy 5-cell trade confirmation parser.""" + """Return a one-element list of Activity on success, empty on failure.""" try: soup = BeautifulSoup(raw_html, "html.parser") cells = [ @@ -90,8 +48,6 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]: direction = (ActivityType.SELL if direction_txt.strip().lower() == "sold" else ActivityType.BUY) quantity = Decimal(qty_txt.replace(",", "").strip()) - # Price like "$123.45" — strip the currency sign and parse the numeric tail. - # Handle "£", "€", "USD", etc. by taking the last numeric span. price_clean = price_txt for sign in ("$", "£", "€", "USD", "GBP", "EUR"): price_clean = price_clean.replace(sign, "") @@ -115,126 +71,3 @@ def _parse_trade_confirmation(raw_html: str) -> list[Activity]: ] except (ValueError, InvalidOperation, IndexError, AttributeError): return [] - - -# Heuristic extractors for vest-release emails. Labels observed in public -# Schwab RSU release samples; real fixture needed to tighten these. -_VEST_DATE_RE = re.compile( - r"(?:Release Date|Vest Date|Vesting Date)\s*[:<][^0-9]*" - r"(\d{1,2}[\s/\-][A-Za-z]{3}[\s/\-]\d{2,4}|\d{2}/\d{2}/\d{4}|\d{4}-\d{2}-\d{2})", - re.IGNORECASE) -_VEST_TICKER_RE = re.compile(r"(?:Ticker|Symbol)\s*[:<]\s*([A-Z]{2,5})", - re.IGNORECASE) -_VEST_SHARES_RELEASED_RE = re.compile( - r"(?:Shares Released|Total Shares (?:Released|Vested))\s*[:<]\s*" - r"([\d,]+(?:\.\d+)?)", - re.IGNORECASE) -_VEST_SHARES_WITHHELD_RE = re.compile( - r"(?:Shares (?:Withheld|Sold)(?: for Taxes)?)\s*[:<]\s*" - r"([\d,]+(?:\.\d+)?)", - re.IGNORECASE) -_VEST_FMV_RE = re.compile( - r"(?:Market Price|FMV|Fair Market Value)\s*[:<]\s*" - r"\$?\s*([\d,]+(?:\.\d+)?)", - re.IGNORECASE) -_VEST_TAX_USD_RE = re.compile( - r"(?:Tax Withholding Amount|Total Tax Withholding|Tax Withheld)\s*[:<]\s*" - r"\$?\s*([\d,]+(?:\.\d+)?)", - re.IGNORECASE) - - -def _parse_vest_release(raw_html: str) -> VestParseResult | None: - """Best-effort extraction from a Schwab Release Confirmation email. - - Runs label regexes on the plain-text view of the HTML. Returns None - (signalling fall-through) if the core four fields (date, ticker, - shares released, FMV) don't all resolve — that's a strong signal the - heuristics need a real fixture before they can be trusted on a live - email. - """ - try: - soup = BeautifulSoup(raw_html, "html.parser") - text = soup.get_text(" ", strip=True) - except Exception: - return None - - date_str = _search_group(_VEST_DATE_RE, text) - ticker = _search_group(_VEST_TICKER_RE, text) - shares_released_str = _search_group(_VEST_SHARES_RELEASED_RE, text) - fmv_str = _search_group(_VEST_FMV_RE, text) - if not (date_str and ticker and shares_released_str and fmv_str): - return None - - try: - vest_date = dateparser.parse(date_str) - shares_vested = Decimal(shares_released_str.replace(",", "")) - fmv = Decimal(fmv_str.replace(",", "")) - except (ValueError, InvalidOperation): - return None - - shares_sold_str = _search_group(_VEST_SHARES_WITHHELD_RE, text) - shares_sold_to_cover = (Decimal(shares_sold_str.replace(",", "")) - if shares_sold_str else None) - tax_usd_str = _search_group(_VEST_TAX_USD_RE, text) - tax_withheld_usd = (Decimal(tax_usd_str.replace(",", "")) - if tax_usd_str else None) - - external_id = (f"schwab:{vest_date.date().isoformat()}:{ticker}:VEST:" - f"{shares_vested}") - - vest_event = VestEvent( - external_id=external_id, - vest_date=vest_date, - ticker=ticker, - shares_vested=shares_vested, - shares_sold_to_cover=shares_sold_to_cover, - fmv_at_vest_usd=fmv, - tax_withheld_usd=tax_withheld_usd, - source="schwab_email", - raw={ - "date": date_str, - "ticker": ticker, - "shares_released": shares_released_str, - "fmv": fmv_str, - "shares_withheld": shares_sold_str or "", - "tax_withheld": tax_usd_str or "", - }, - ) - - # Sibling Activities for Wealthfolio: full vest as BUY, sell-to-cover - # slice as SELL, both at the same FMV so net cash = 0 on that day. - activities: list[Activity] = [ - Activity( - external_id=f"{external_id}:BUY", - account_id=_ACCOUNT_ID, - account_type=AccountType.GIA, - date=vest_date, - activity_type=ActivityType.BUY, - symbol=ticker, - quantity=shares_vested, - unit_price=fmv, - currency=_DEFAULT_CURRENCY, - notes="schwab-vest-release", - ) - ] - if shares_sold_to_cover is not None and shares_sold_to_cover > 0: - activities.append( - Activity( - external_id=f"{external_id}:SELL_TO_COVER", - account_id=_ACCOUNT_ID, - account_type=AccountType.GIA, - date=vest_date, - activity_type=ActivityType.SELL, - symbol=ticker, - quantity=shares_sold_to_cover, - unit_price=fmv, - currency=_DEFAULT_CURRENCY, - notes="schwab-sell-to-cover", - )) - - return VestParseResult(activities=activities, vest_event=vest_event) - - -def _search_group(pattern: re.Pattern[str], text: str) -> str | None: - m = pattern.search(text) - return m.group(1).strip() if m else None diff --git a/tests/providers/parsers/test_schwab.py b/tests/providers/parsers/test_schwab.py index c39bd0c..8e3c736 100644 --- a/tests/providers/parsers/test_schwab.py +++ b/tests/providers/parsers/test_schwab.py @@ -82,59 +82,3 @@ def test_price_with_commas_parses() -> None: html = _SELL.replace("$612.34", "$1,612.34") a = parse_schwab_email(html)[0] assert a.unit_price == Decimal("1612.34") - - -# --- Vest-release parsing ------------------------------------------------- - -_VEST_RELEASE = """ -

Release Confirmation

-

-Release Date: 15 Mar 2026 -Ticker: META -Total Shares Released: 100.0 -Market Price: $612.34 -Shares Withheld for Taxes: 45 -Tax Withholding Amount: $27,555.30 -

-""" - - -def test_vest_release_returns_two_activities_and_vest_event() -> None: - """Release Confirmation yields a BUY (full vest) + SELL (sell-to-cover) + VestEvent.""" - from broker_sync.providers.parsers.schwab import parse_schwab_email_full - - result = parse_schwab_email_full(_VEST_RELEASE) - assert result.vest_event is not None - assert result.vest_event.ticker == "META" - assert result.vest_event.shares_vested == Decimal("100.0") - assert result.vest_event.shares_sold_to_cover == Decimal("45") - assert result.vest_event.fmv_at_vest_usd == Decimal("612.34") - assert result.vest_event.tax_withheld_usd == Decimal("27555.30") - assert result.vest_event.vest_date.date().isoformat() == "2026-03-15" - assert result.vest_event.external_id.startswith("schwab:2026-03-15:META:VEST:") - - assert len(result.activities) == 2 - buy = result.activities[0] - assert buy.activity_type is ActivityType.BUY - assert buy.quantity == Decimal("100.0") - sell = result.activities[1] - assert sell.activity_type is ActivityType.SELL - assert sell.quantity == Decimal("45") - assert sell.unit_price == Decimal("612.34") - - -def test_vest_email_with_unparseable_body_returns_empty() -> None: - """Subject says Release Confirmation but fields missing → empty result, no crash.""" - from broker_sync.providers.parsers.schwab import parse_schwab_email_full - - html = "Release Confirmation — please contact support" - result = parse_schwab_email_full(html) - assert result.vest_event is None - assert result.activities == [] - - -def test_back_compat_parse_schwab_email_drops_vest_event() -> None: - """The legacy list[Activity] shape remains stable for existing callers.""" - acts = parse_schwab_email(_VEST_RELEASE) - assert len(acts) == 2 - assert all(isinstance(a.activity_type, ActivityType) for a in acts) From 17c2a69c6c491856c9d34d1b9da9724723e04478 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 27 May 2026 10:02:07 +0000 Subject: [PATCH 41/44] parsers/schwab: emit paired BUY for recent SELL (vest synthesis) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Schwab Stock Plan Services doesn't email vest-release confirmations to the employee inbox — only the same-day-sell trade-executed alert lands. The vest itself was invisible to broker-sync, so the META cadence panel in the wealth dashboard has been missing the May 2026 vest BUY and would keep missing every future vest. Synthesis: when a SELL email's trade date is on/after the configured boundary (default 2026-04-01), also emit a paired BUY with identical date/qty/price/symbol. Notes link the pair via the SELL's external_id. Verified true across 14 historical vests — 100% same-day-sell pattern, SELL qty == vest qty. Boundary stops the synthesis from back-filling vests prior to 2026-04 which already have csv-sourced BUY rows in Wealthfolio from the historical one-shot backfill (last vest 2026-02-18). The csv BUYs and inferred BUYs have distinct external_ids, so re-running against old emails would double-count without this guard. Override via env var `SCHWAB_VEST_INFER_FROM_DATE=yyyy-mm-dd` on the broker-sync-imap cron. Tests: 4 new cases — recent SELL pairs, old SELL doesn't pair, env override works, BUY-direction emails (rare) don't get paired. Co-Authored-By: Claude Opus 4.7 --- broker_sync/providers/parsers/schwab.py | 90 +++++++++++++++++++------ tests/providers/parsers/test_schwab.py | 65 +++++++++++++++++- 2 files changed, 134 insertions(+), 21 deletions(-) diff --git a/broker_sync/providers/parsers/schwab.py b/broker_sync/providers/parsers/schwab.py index 762a613..5a34f1b 100644 --- a/broker_sync/providers/parsers/schwab.py +++ b/broker_sync/providers/parsers/schwab.py @@ -7,9 +7,16 @@ cells holding date / direction / quantity / ticker / price. It does NOT email vest-release / Release Confirmation messages to the employee address for this account (verified against 4 years of inbox -history, 2022-2026 — see infra/docs in code-fqgr). Vest data must come -from the META payslip via payslip-ingest, not from email. The whole -vest-release parser that used to live here was dead code. +history, 2022-2026). The vest itself is invisible to IMAP. + +Same-day-sell synthesis: Meta RSUs vest and are sold the same day at +the same FMV (verified across 14 historical vests). When a SELL email +is parsed AND its trade date is on or after `VEST_INFER_FROM_DATE`, +we ALSO emit a paired BUY representing the underlying vest event — +same date, same quantity, same price. The date boundary stops this +back-filling historical vests that already have csv-sourced BUY rows +in Wealthfolio (which would duplicate at chart-level despite distinct +external_ids). On any parse failure we return an empty list — an unparseable email shouldn't crash the IMAP batch. @@ -17,6 +24,8 @@ shouldn't crash the IMAP batch. from __future__ import annotations import logging +import os +from datetime import date, datetime from decimal import Decimal, InvalidOperation from bs4 import BeautifulSoup @@ -29,9 +38,34 @@ log = logging.getLogger(__name__) _ACCOUNT_ID = "schwab-workplace" _DEFAULT_CURRENCY = "USD" +# Inferred-BUY synthesis boundary. SELL emails on or after this date +# emit a paired BUY for the underlying vest; earlier ones do not (they +# already have csv-sourced BUYs in Wealthfolio from the one-shot +# historical backfill, last vest 2026-02-18). Override at runtime with +# the env var if a different cutover is needed. ISO-8601 yyyy-mm-dd. +_DEFAULT_VEST_INFER_FROM = "2026-04-01" + + +def _vest_infer_from() -> date: + raw = os.environ.get("SCHWAB_VEST_INFER_FROM_DATE", _DEFAULT_VEST_INFER_FROM).strip() + try: + return datetime.strptime(raw, "%Y-%m-%d").date() + except ValueError: + log.warning( + "SCHWAB_VEST_INFER_FROM_DATE=%r is not yyyy-mm-dd; using default %s", + raw, _DEFAULT_VEST_INFER_FROM, + ) + return datetime.strptime(_DEFAULT_VEST_INFER_FROM, "%Y-%m-%d").date() + def parse_schwab_email(raw_html: str) -> list[Activity]: - """Return a one-element list of Activity on success, empty on failure.""" + """Return Activities for a Schwab trade-executed email. + + Returns: empty list on parse failure; one Activity for a BUY-direction + email (rare — the workplace account is essentially sell-only); for a + SELL email, returns [SELL] plus an inferred paired BUY (=vest event) + when the trade date is on or after the synthesis-boundary date. + """ try: soup = BeautifulSoup(raw_html, "html.parser") cells = [ @@ -52,22 +86,40 @@ def parse_schwab_email(raw_html: str) -> list[Activity]: for sign in ("$", "£", "€", "USD", "GBP", "EUR"): price_clean = price_clean.replace(sign, "") unit_price = Decimal(price_clean.replace(",", "").strip()) + ticker_clean = ticker.strip() - external_id = (f"schwab:{trade_date.date().isoformat()}:{ticker}:" + external_id = (f"schwab:{trade_date.date().isoformat()}:{ticker_clean}:" f"{direction.value}:{quantity}") - return [ - Activity( - external_id=external_id, - account_id=_ACCOUNT_ID, - account_type=AccountType.GIA, - date=trade_date, - activity_type=direction, - symbol=ticker.strip(), - quantity=quantity, - unit_price=unit_price, - currency=_DEFAULT_CURRENCY, - notes=f"schwab-email:{direction_txt}", - ) - ] + primary = Activity( + external_id=external_id, + account_id=_ACCOUNT_ID, + account_type=AccountType.GIA, + date=trade_date, + activity_type=direction, + symbol=ticker_clean, + quantity=quantity, + unit_price=unit_price, + currency=_DEFAULT_CURRENCY, + notes=f"schwab-email:{direction_txt}", + ) + + if direction is not ActivityType.SELL or trade_date.date() < _vest_infer_from(): + return [primary] + + inferred_buy = Activity( + external_id=(f"schwab:vest:{trade_date.date().isoformat()}:" + f"{ticker_clean}:BUY:{quantity}"), + account_id=_ACCOUNT_ID, + account_type=AccountType.GIA, + date=trade_date, + activity_type=ActivityType.BUY, + symbol=ticker_clean, + quantity=quantity, + unit_price=unit_price, + currency=_DEFAULT_CURRENCY, + notes=(f"schwab-vest-inferred-from-same-day-sell | " + f"paired_sell_external_id={external_id}"), + ) + return [inferred_buy, primary] except (ValueError, InvalidOperation, IndexError, AttributeError): return [] diff --git a/tests/providers/parsers/test_schwab.py b/tests/providers/parsers/test_schwab.py index 8e3c736..2cc0213 100644 --- a/tests/providers/parsers/test_schwab.py +++ b/tests/providers/parsers/test_schwab.py @@ -80,5 +80,66 @@ def test_external_id_is_stable_across_reruns() -> None: def test_price_with_commas_parses() -> None: html = _SELL.replace("$612.34", "$1,612.34") - a = parse_schwab_email(html)[0] - assert a.unit_price == Decimal("1612.34") + # The first activity is the inferred BUY (date 2025-01-23 ≥ 2026-04-01? no → + # only one activity for this old-dated email), so index 0 is the SELL. + acts = parse_schwab_email(html) + sell = next(a for a in acts if a.activity_type is ActivityType.SELL) + assert sell.unit_price == Decimal("1612.34") + + +# --- Inferred vest BUY --------------------------------------------------- + + +def _recent_sell(date_iso: str = "2026-05-19", qty: str = "55", price: str = "609.35") -> str: + return f""" + + + + + + +
{date_iso}
Sold
{qty}
META
${price}
+""" + + +def test_recent_sell_emits_paired_buy() -> None: + """SELL dated on/after the synthesis boundary triggers a paired BUY.""" + acts = parse_schwab_email(_recent_sell()) + assert len(acts) == 2 + + buy = next(a for a in acts if a.activity_type is ActivityType.BUY) + sell = next(a for a in acts if a.activity_type is ActivityType.SELL) + + assert buy.quantity == sell.quantity == Decimal("55") + assert buy.unit_price == sell.unit_price == Decimal("609.35") + assert buy.date == sell.date + assert buy.symbol == sell.symbol == "META" + assert "schwab-vest-inferred-from-same-day-sell" in (buy.notes or "") + assert buy.external_id == "schwab:vest:2026-05-19:META:BUY:55" + assert sell.external_id == "schwab:2026-05-19:META:SELL:55" + + +def test_old_sell_emits_only_sell() -> None: + """SELL dated before 2026-04-01 (default boundary) skips the paired BUY — + those vests already have csv-sourced BUY rows in Wealthfolio.""" + acts = parse_schwab_email(_recent_sell(date_iso="2025-08-19")) + assert len(acts) == 1 + assert acts[0].activity_type is ActivityType.SELL + + +def test_boundary_env_var_overrides(monkeypatch: object) -> None: + """The synthesis boundary is configurable via env var.""" + import os + os.environ["SCHWAB_VEST_INFER_FROM_DATE"] = "2025-01-01" + try: + acts = parse_schwab_email(_recent_sell(date_iso="2025-08-19")) + assert len(acts) == 2 # now in scope + finally: + del os.environ["SCHWAB_VEST_INFER_FROM_DATE"] + + +def test_buy_email_does_not_emit_inferred_buy() -> None: + """BUY-direction emails (rare for workplace account) don't get paired.""" + acts = parse_schwab_email(_BUY.replace("2024-11-15", "2026-05-15")) + assert len(acts) == 1 + assert acts[0].activity_type is ActivityType.BUY From 3427f5c9e192ef2d6e3f840e042c896cfc180374 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 27 May 2026 12:03:41 +0000 Subject: [PATCH 42/44] ibkr: emit ibkr_cash_balance{currency, account} per CashReport row Each daily run now pushes one Pushgateway metric per currency row from the Flex Activity Query's CashReport section (typically BASE_SUMMARY aggregate + one row per held currency). Makes dormant-account balance checks trivial and adds a Grafana surface for cash drift alerting. Requires the Activity Flex Query in IBKR Client Portal to have the CashReport section enabled. Co-Authored-By: Claude Opus 4.7 --- broker_sync/cli.py | 10 ++++++++++ broker_sync/providers/ibkr.py | 17 +++++++++++++++++ tests/fixtures/ibkr/sample_flex.xml | 4 ++++ tests/providers/test_ibkr.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) diff --git a/broker_sync/cli.py b/broker_sync/cli.py index 7f855f5..64057f7 100644 --- a/broker_sync/cli.py +++ b/broker_sync/cli.py @@ -310,6 +310,16 @@ def ibkr( float(drift), ) ) + # Cash balances (one row per currency from CashReport, plus a + # BASE_SUMMARY row consolidated in account base currency). + for currency, ending_cash in provider.cash_balances(): + drift_metrics.append( + ( + "ibkr_cash_balance", + {"currency": currency, "account": "ibkr-uk"}, + float(ending_cash), + ) + ) drift_metrics.append( ("ibkr_sync_last_success_timestamp_seconds", {}, float(time.time())) ) diff --git a/broker_sync/providers/ibkr.py b/broker_sync/providers/ibkr.py index fcff89f..e180bcb 100644 --- a/broker_sync/providers/ibkr.py +++ b/broker_sync/providers/ibkr.py @@ -257,3 +257,20 @@ class IBKRProvider: ) out.append((symbol, Decimal(str(pos.position)))) return out + + def cash_balances(self) -> list[tuple[str, Decimal]]: + """Return ``[(currency, ending_cash), ...]`` from the CashReport. + + Includes the ``BASE_SUMMARY`` aggregate row (account base currency + consolidated) plus any per-currency rows. Empty list if no + CashReport section in the Flex query or before first ``fetch()``. + """ + if self._last_response is None: + return [] + stmt = self._last_response.FlexStatements[0] + out: list[tuple[str, Decimal]] = [] + for row in stmt.CashReport or []: + if row.endingCash is None or row.currency is None: + continue + out.append((str(row.currency), Decimal(str(row.endingCash)))) + return out diff --git a/tests/fixtures/ibkr/sample_flex.xml b/tests/fixtures/ibkr/sample_flex.xml index 0d82fcf..d3130a3 100644 --- a/tests/fixtures/ibkr/sample_flex.xml +++ b/tests/fixtures/ibkr/sample_flex.xml @@ -16,6 +16,10 @@ + + + + diff --git a/tests/providers/test_ibkr.py b/tests/providers/test_ibkr.py index 8dfba07..edbc51d 100644 --- a/tests/providers/test_ibkr.py +++ b/tests/providers/test_ibkr.py @@ -194,3 +194,31 @@ async def test_ibkr_provider_open_positions_after_fetch( positions = provider.open_positions() # VUAG → VUAG.L (LSE inferred from GBP); AAPL unchanged (USD) assert dict(positions) == {"VUAG.L": Decimal("8"), "AAPL": Decimal("5")} + + +async def test_ibkr_provider_cash_balances_after_fetch( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """cash_balances() returns (currency, ending_cash) tuples from CashReport.""" + from ibflex import client as ib_client + + with open("tests/fixtures/ibkr/sample_flex.xml", "rb") as f: + xml_bytes = f.read() + monkeypatch.setattr(ib_client, "download", lambda *a, **kw: xml_bytes) + + provider = IBKRProvider( + token="t", + query_id="q", + upstream_account_id="U12345678", + ) + [a async for a in provider.fetch()] + + balances = provider.cash_balances() + # Fixture has BASE_SUMMARY + USD rows, both 1.23 + assert dict(balances) == {"BASE_SUMMARY": Decimal("1.23"), "USD": Decimal("1.23")} + + +def test_ibkr_provider_cash_balances_before_fetch_returns_empty() -> None: + """No CashReport data before fetch().""" + provider = IBKRProvider(token="t", query_id="q", upstream_account_id="U12345678") + assert provider.cash_balances() == [] From 0d23487608363a13bba0f5bf94457867f46a78e0 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 27 May 2026 17:24:54 +0000 Subject: [PATCH 43/44] imap: skip InvestEngine by default; opt back in via INCLUDE env MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Post-mortem 2026-05-27: 39 IMAP-source IE BUYs + their cash-flow DEPOSITs were re-inserted into Wealthfolio at 09:22:18 UTC, exactly the rows the £252k dedup removed the previous day. The cron's BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine env var did its job (cron logged ie_skipped=53), but some other entry point — kubectl run, poetry run on the devvm, or a sibling agent session — ran the IMAP ingest WITHOUT that env. The opt-out was a foot-gun. This change makes the IE-via-IMAP safety STRUCTURAL: `invest-engine` is in the default exclude set inside _resolve_excluded_providers(). Any code path now skips IE unless the caller explicitly sets `BROKER_SYNC_IMAP_INCLUDE_PROVIDERS=invest-engine`. The `BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS` env still works (additive) for forward-compat in case Schwab etc. ever need similar treatment. INCLUDE wins over both the default exclude set and EXCLUDE env. Co-Authored-By: Claude Opus 4.7 --- broker_sync/providers/imap.py | 39 ++++++++++++++++--- tests/providers/test_imap.py | 72 +++++++++++++++++++++++++++++------ 2 files changed, 94 insertions(+), 17 deletions(-) diff --git a/broker_sync/providers/imap.py b/broker_sync/providers/imap.py index 5564dd3..0b9bbb7 100644 --- a/broker_sync/providers/imap.py +++ b/broker_sync/providers/imap.py @@ -151,14 +151,41 @@ def _fetch_all(creds: ImapCreds) -> Iterator[bytes]: yield raw -def fetch_activities(creds: ImapCreds) -> list[Activity]: - out: list[Activity] = [] - ie_parsed = schwab_parsed = ie_skipped = skipped = 0 - exclude = { - p.strip().lower() +def _resolve_excluded_providers() -> set[str]: + """Return the set of providers the IMAP fetcher must skip. + + Default-exclude list is structural — `invest-engine` is ALWAYS skipped + unless explicitly opted back in via `BROKER_SYNC_IMAP_INCLUDE_PROVIDERS`. + This protects against accidental re-ingestion via any code path that + doesn't set the cron's env (e.g. `kubectl run --rm`, devvm `poetry run`, + a sibling agent session). See post-mortem 2026-05-27 — the IMAP path + re-inserted 39 IE BUYs that had been deduped the previous day, because + the safety lived only on the cronjob spec. + + Additional providers can be excluded via + `BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS`. `INCLUDE` always wins over + `EXCLUDE` and the default skip-list. + """ + _DEFAULT_EXCLUDED = {"invest-engine", "invest_engine"} + extra = { + p.strip().lower().replace("_", "-") for p in os.environ.get("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "").split(",") if p.strip() } + include = { + p.strip().lower().replace("_", "-") + for p in os.environ.get("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", "").split(",") + if p.strip() + } + # Canonicalise the default set under the same key normalisation. + canonical = {p.replace("_", "-") for p in _DEFAULT_EXCLUDED} + return (canonical | extra) - include + + +def fetch_activities(creds: ImapCreds) -> list[Activity]: + out: list[Activity] = [] + ie_parsed = schwab_parsed = ie_skipped = skipped = 0 + exclude = _resolve_excluded_providers() for raw in _fetch_all(creds): try: msg = email.message_from_bytes(raw) @@ -167,7 +194,7 @@ def fetch_activities(creds: ImapCreds) -> list[Activity]: continue sender = _extract_sender(msg) if sender in _IE_SENDERS or sender.endswith("@investengine.com"): - if "invest-engine" in exclude or "invest_engine" in exclude: + if "invest-engine" in exclude: ie_skipped += 1 continue out.extend(ie_parser.parse_invest_engine_email(raw)) diff --git a/tests/providers/test_imap.py b/tests/providers/test_imap.py index 0264b37..30b09d1 100644 --- a/tests/providers/test_imap.py +++ b/tests/providers/test_imap.py @@ -105,10 +105,15 @@ def test_non_ie_activities_passed_through_unchanged() -> None: assert routed[0].account_type is AccountType.GIA -def test_exclude_invest_engine_skips_ie_emails(monkeypatch: MonkeyPatch) -> None: - """BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS=invest-engine should skip IE messages - so we don't duplicate IE buys already ingested via the bearer-token API path. - Schwab routing must remain unaffected.""" +def test_invest_engine_skipped_by_default(monkeypatch: MonkeyPatch) -> None: + """InvestEngine messages MUST be skipped by default, even with no env set. + + Post-mortem 2026-05-27: any code path that doesn't set the cron's env + (e.g. `kubectl run --rm` or devvm `poetry run`) was re-importing IE + BUYs through this IMAP path. The opt-out env var was a foot-gun. + Invariant now: structural default skip; opt back in only with + BROKER_SYNC_IMAP_INCLUDE_PROVIDERS. + """ from broker_sync.providers import imap as imap_mod from broker_sync.providers.parsers import invest_engine as ie_parser @@ -130,15 +135,60 @@ def test_exclude_invest_engine_skips_ie_emails(monkeypatch: MonkeyPatch) -> None creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d") - monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "invest-engine") - out_excluded = imap_mod.fetch_activities(creds) - # IE skipped → only the schwab activity is emitted - assert len(out_excluded) == 1 - + # Default (no env): IE skipped, Schwab parsed. monkeypatch.delenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", raising=False) + monkeypatch.delenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", raising=False) out_default = imap_mod.fetch_activities(creds) - # Both providers fire when env unset - assert len(out_default) == 2 + assert len(out_default) == 1, "IE must be skipped by default; only Schwab emitted" + + +def test_invest_engine_opt_in_via_include_env(monkeypatch: MonkeyPatch) -> None: + """Setting BROKER_SYNC_IMAP_INCLUDE_PROVIDERS=invest-engine re-enables + IE parsing (escape hatch for the legacy IMAP path).""" + from broker_sync.providers import imap as imap_mod + from broker_sync.providers.parsers import invest_engine as ie_parser + + ie_email = b"From: noreply@investengine.com\r\n\r\nirrelevant\r\n" + schwab_email = b"From: donotreply@schwab.com\r\n\r\n\r\n" + monkeypatch.setattr(imap_mod, "_fetch_all", lambda _: [ie_email, schwab_email]) + monkeypatch.setattr(ie_parser, "parse_invest_engine_email", lambda raw: [object()]) + monkeypatch.setattr(imap_mod, "parse_schwab_email", lambda html: [object()]) + + creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d") + + monkeypatch.setenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", "invest-engine") + monkeypatch.delenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", raising=False) + out = imap_mod.fetch_activities(creds) + assert len(out) == 2, "INCLUDE=invest-engine must re-enable IE parsing" + + +def test_exclude_schwab_still_works(monkeypatch: MonkeyPatch) -> None: + """EXCLUDE env still works for other providers (forward-compat).""" + from broker_sync.providers import imap as imap_mod + from broker_sync.providers.parsers import invest_engine as ie_parser + + schwab_email = b"From: donotreply@schwab.com\r\n\r\n\r\n" + monkeypatch.setattr(imap_mod, "_fetch_all", lambda _: [schwab_email]) + monkeypatch.setattr(ie_parser, "parse_invest_engine_email", lambda raw: [object()]) + monkeypatch.setattr(imap_mod, "parse_schwab_email", lambda html: [object()]) + + creds = imap_mod.ImapCreds(host="h", user="u", password="p", directory="d") + + monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "schwab") + monkeypatch.delenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", raising=False) + out = imap_mod.fetch_activities(creds) + assert len(out) == 0, "Schwab must be skipped when in EXCLUDE list" + + +def test_include_overrides_default_and_exclude(monkeypatch: MonkeyPatch) -> None: + """INCLUDE wins over both the structural default and EXCLUDE env var.""" + from broker_sync.providers import imap as imap_mod + + monkeypatch.setenv("BROKER_SYNC_IMAP_EXCLUDE_PROVIDERS", "invest-engine,schwab") + monkeypatch.setenv("BROKER_SYNC_IMAP_INCLUDE_PROVIDERS", "invest-engine") + resolved = imap_mod._resolve_excluded_providers() + assert "invest-engine" not in resolved + assert "schwab" in resolved def test_schwab_subdomain_sender_matches() -> None: From b632d951e4927e47e293e7d65f081ad1e68121ab Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 13 Jun 2026 01:40:07 +0000 Subject: [PATCH 44/44] ci: move image build off-infra to GHA -> ghcr (ADR-0002) Generated by infra/scripts/offinfra-onboard: GHA builds+tests on the GitHub mirror, pushes ghcr.io/viktorbarzin/wealthfolio-sync, then triggers the Woodpecker deploy (repo 0). Old in-cluster build pipeline removed: .woodpecker/build.yml Co-Authored-By: Claude Fable 5 --- .github/workflows/build.yml | 103 ++++++++++++++++++++++++++++++++++++ .woodpecker/build.yml | 45 ---------------- 2 files changed, 103 insertions(+), 45 deletions(-) create mode 100644 .github/workflows/build.yml delete mode 100644 .woodpecker/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..f592306 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,103 @@ +name: Build and Push + +# Off-infra build (ADR-0002). Canonical repo is Forgejo viktor/broker-sync, which +# push-mirrors here; this workflow builds on GitHub-hosted runners, pushes the +# image to GHCR, then signals the Woodpecker deploy pipeline (repo 0) +# to roll the cluster — the homelab never sees build IO or registry pushes. +# +# Committed on the FORGEJO side (the mirror is one-way; commits made on GitHub +# are overwritten by the next sync). Generated by infra/scripts/offinfra-onboard. +on: + push: + branches: [master] + workflow_dispatch: {} + +permissions: + contents: read + packages: write + +jobs: + lint-and-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + - name: Lint + type-check + test + run: | + pip install --no-cache-dir "poetry==1.8.4" + poetry install --no-interaction --no-root + poetry run ruff check . + poetry run mypy broker_sync tests + poetry run pytest -q + + build: + needs: lint-and-test + runs-on: ubuntu-latest + outputs: + image_tag: ${{ steps.meta.outputs.sha }} + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 # full history + tags so svu sees the last vX.Y.Z + fetch-tags: true + # Auto-semver (svu): tag-only, pushed to CANONICAL Forgejo (GitHub tags + # would be wiped by the next mirror sync). Best-effort: never blocks the build. + - name: Compute + tag semver (svu) + env: + FORGEJO_GIT_TOKEN: ${{ secrets.FORGEJO_GIT_TOKEN }} + run: | + set +e + git config user.email "ci@viktorbarzin.me" + git config user.name "broker-sync-ci" + git config --global --add safe.directory "$GITHUB_WORKSPACE" + curl -sSL https://github.com/caarlos0/svu/releases/download/v3.4.1/svu_3.4.1_linux_amd64.tar.gz | tar -xz svu + CUR=$(./svu current 2>/dev/null) + NEXT=$(./svu next 2>/dev/null) + echo "svu current=[$CUR] next=[$NEXT]" + if [ -n "$NEXT" ] && [ "$NEXT" != "$CUR" ]; then + git tag "$NEXT" 2>/dev/null + git push "https://viktor:${FORGEJO_GIT_TOKEN}@forgejo.viktorbarzin.me/viktor/broker-sync.git" "$NEXT" && echo "pushed tag $NEXT to forgejo" || echo "tag push failed (non-blocking)" + fi + exit 0 + - uses: docker/setup-buildx-action@v4 + - uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - id: meta + run: echo "sha=$(echo ${{ github.sha }} | cut -c1-8)" >> "$GITHUB_OUTPUT" + - uses: docker/build-push-action@v7 + with: + context: . + push: true + platforms: linux/amd64 + # Single-manifest images (no provenance/SBOM attestation children) so + # registry retention can never orphan index children (ADR-0002). + provenance: false + tags: | + ghcr.io/viktorbarzin/wealthfolio-sync:${{ steps.meta.outputs.sha }} + ghcr.io/viktorbarzin/wealthfolio-sync:latest + cache-from: type=gha + cache-to: type=gha,mode=max + # Keep the newest ~10 versions on ghcr (latest rides the newest one). + - name: ghcr retention (keep 10) + uses: actions/delete-package-versions@v5 + continue-on-error: true + with: + package-name: wealthfolio-sync + package-type: container + min-versions-to-keep: 10 + + notify-failure: + needs: [lint-and-test, build] + if: failure() + runs-on: ubuntu-latest + steps: + - name: Slack notify + run: | + curl -sf -X POST -H 'Content-Type: application/json' \ + -d "{\"text\":\":rotating_light: broker-sync off-infra build FAILED: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\"}" \ + "${{ secrets.SLACK_WEBHOOK }}" || true diff --git a/.woodpecker/build.yml b/.woodpecker/build.yml deleted file mode 100644 index 423ea0c..0000000 --- a/.woodpecker/build.yml +++ /dev/null @@ -1,45 +0,0 @@ -when: - event: push - branch: [main, master] - -clone: - git: - image: woodpeckerci/plugin-git - settings: - attempts: 5 - backoff: 10s - -steps: - - name: lint-and-test - image: python:3.12-slim - commands: - - pip install --no-cache-dir "poetry==1.8.4" - - poetry install --no-interaction --no-root - - poetry run ruff check . - - poetry run mypy broker_sync tests - - poetry run pytest -q - - - name: build-and-push - image: woodpeckerci/plugin-docker-buildx - depends_on: - - lint-and-test - settings: - # Image name is `wealthfolio-sync` to match the deployment in - # infra/stacks/wealthfolio/main.tf (CronJob `wealthfolio-sync`). - # The repo is called `broker-sync` because the source covers - # multiple brokers (Trading 212, Schwab, Fidelity, IMAP-CSV) — - # we just happen to publish it under the wealthfolio name since - # that's the consumer stack. - repo: - - forgejo.viktorbarzin.me/viktor/wealthfolio-sync - logins: - - registry: forgejo.viktorbarzin.me - username: - from_secret: forgejo_user - password: - from_secret: forgejo_push_token - dockerfile: Dockerfile - context: . - auto_tag: true - platforms: - - linux/amd64