## Context
Prior commit 832732a scaffolded the provider with a stub fetch() that
raised FidelityProviderConfigError. This commit replaces the stub with
the end-to-end ingest flow, validated against the real PlanViewer site
during a live login session on 2026-04-18.
Fidelity UK PlanViewer mixes a legacy Struts2 HTML app
(www.planviewer.fidelity.co.uk) with a React SPA at
pv.planviewer.fidelity.co.uk. Authentication is PingFederate OAuth2 at
id.fidelity.co.uk — password + memorable word + SMS OTP, with a
remember-device cookie that keeps the session alive for weeks. The
transaction history is server-rendered HTML at DisplayMyPlanMemberTransHist.action;
current fund holdings come from the DisplayValuation.action JSON XHR.
Both live behind the same cookie jar, so one Playwright session (seeded
interactively once, kept alive via storage_state) can scrape both.
## This change
- broker_sync/providers/parsers/fidelity.py (NEW)
- parse_transactions_html: extracts cash-impacting rows from the
#myplan_member_transhist_support table, skips Bulk Switches (no cash
movement), emits FidelityCashTx with deterministic external_id for
dedup.
- parse_valuation_json: lifts fund code + name + units + price +
contribution-type breakdown from the JSON payload.
- broker_sync/providers/fidelity_planviewer.py (REWRITTEN)
- FidelityPlanViewerProvider.fetch() now loads storage_state, boots
headless Chromium, navigates landing → main page (to hydrate the
SPA session + capture DisplayValuation XHR) → transactions page
with a wide 01 Jan 1990 → today window. Raises FidelitySessionError
if PlanViewer shows the 15-min idle page or redirects back to
id.fidelity.co.uk.
- _gains_offset_activity emits a synthetic DEPOSIT/WITHDRAWAL with a
date-keyed external_id so WF Net Worth reconciles to the
Fidelity-reported pot value without stacking duplicates across
monthly runs.
- Rolls storage_state back to disk after each run, extending session
TTL.
- tests/providers/test_fidelity_planviewer.py (EXTENDED)
- 8 tests against a real captured fixture: account shape, guard on
missing storage_state, full-fixture round-trip (51 txs summing to
£102,004.15), Bulk Switch filtered, deterministic external_id,
valuation parse with fund-code resolution, gains-offset direction
+ skip-when-empty.
- tests/fixtures/fidelity/transactions-full.html + valuation.json (NEW)
- Sanitised captures from the 2026-04-18 live session.
## What is NOT in this change
- CronJob + Vault secret wiring + Prometheus alert in
infra/stacks/broker-sync/main.tf — next commit.
- Dockerfile Chromium install — next commit.
- The scrape-and-import was already done manually (51 activities +
1 gains offset imported into WF account a7d6208d); this commit
productionises the code path so the monthly cron can do the same.
## Verification
### Automated
$ poetry run pytest tests/providers/test_fidelity_planviewer.py -v
8 passed in 0.88s
$ poetry run pytest -q
128 passed, 1 skipped in 1.41s
$ poetry run mypy broker_sync/providers/fidelity_planviewer.py broker_sync/providers/parsers/fidelity.py
Success: no issues found in 2 source files
$ poetry run ruff check broker_sync/providers/fidelity_planviewer.py broker_sync/providers/parsers/fidelity.py
All checks passed!
### Manual verification (2026-04-18 live run)
1. poetry run broker-sync fidelity-seed (headed browser + SMS OTP) —
captured storage_state, staged to Vault.
2. Inline import script hit the same code paths the provider now runs;
52 activities imported into a new WF WORKPLACE_PENSION account, WF
Net Worth jumped from £865,358 → £1,003,083.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
129 lines
4.3 KiB
Python
129 lines
4.3 KiB
Python
"""Parsers for Fidelity UK PlanViewer scraped data.
|
|
|
|
Two inputs:
|
|
|
|
- **Transactions HTML** from ``/planviewer/DisplayMyPlanMemberTransHist.action``
|
|
rendered with a wide date range. The relevant <table> has
|
|
``id="myplan_member_transhist_support"``.
|
|
- **Valuation JSON** from the XHR ``/planviewer/DisplayValuation.action`` —
|
|
the SPA calls this to render the my-investments dashboard. Contains
|
|
current unit holdings + price + breakdown by contribution type.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import re
|
|
from dataclasses import dataclass
|
|
from datetime import UTC, datetime
|
|
from decimal import Decimal
|
|
from typing import Any
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
_AMOUNT_RE = re.compile(r"\u00a3([\d,]+(?:\.\d+)?)")
|
|
|
|
# Fidelity transaction type strings we care about
|
|
_TX_DEPOSIT_TYPES = {
|
|
"regular premium",
|
|
"single premium",
|
|
"investment management rebate",
|
|
}
|
|
_TX_IGNORE_TYPES = {
|
|
"bulk switch", # pure reallocation, no cash impact
|
|
"fund switch",
|
|
}
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class FidelityCashTx:
|
|
"""A single cash-impacting transaction from the transaction history page."""
|
|
date: datetime
|
|
tx_type: str # raw Fidelity label ("Regular Premium", "Single Premium", …)
|
|
amount: Decimal
|
|
external_id: str
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class FidelityHolding:
|
|
"""A current fund-unit holding from DisplayValuation.action."""
|
|
fund_code: str
|
|
fund_name: str
|
|
units: Decimal
|
|
unit_price: Decimal
|
|
currency: str
|
|
total_value: Decimal
|
|
# Contribution-type breakdown ({"SASC": Decimal(...), "ERXS": Decimal(...)})
|
|
units_by_source: dict[str, Decimal]
|
|
|
|
|
|
def parse_transactions_html(html: str) -> list[FidelityCashTx]:
|
|
"""Extract cash-impacting transactions from the transaction history page.
|
|
|
|
Skips bulk switches (no cash movement) and header/total rows. Deterministic
|
|
external_id so re-runs dedup against the same rows.
|
|
"""
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
out: list[FidelityCashTx] = []
|
|
for tr in soup.select("table#myplan_member_transhist_support tr"):
|
|
cells = [td.get_text(" ", strip=True) for td in tr.find_all("td")]
|
|
if len(cells) != 7:
|
|
continue
|
|
date_str, tx_type, _f, _c, _u, _p, amount_str = cells
|
|
m_date = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str)
|
|
if not m_date:
|
|
continue
|
|
tx_lower = tx_type.lower()
|
|
if tx_lower in _TX_IGNORE_TYPES or tx_type in ("-",):
|
|
continue
|
|
m_amt = _AMOUNT_RE.search(amount_str)
|
|
if not m_amt:
|
|
continue
|
|
amount = Decimal(m_amt.group(1).replace(",", ""))
|
|
if amount == 0:
|
|
continue
|
|
dd, mm, yyyy = m_date.groups()
|
|
dt = datetime(int(yyyy), int(mm), int(dd), tzinfo=UTC)
|
|
fp = hashlib.sha256(
|
|
f"{dt.isoformat()}|{tx_type}|{amount}".encode()
|
|
).hexdigest()[:16]
|
|
out.append(FidelityCashTx(
|
|
date=dt,
|
|
tx_type=tx_type,
|
|
amount=amount,
|
|
external_id=f"fidelity:tx:{fp}",
|
|
))
|
|
return out
|
|
|
|
|
|
def parse_valuation_json(payload: Any) -> list[FidelityHolding]:
|
|
"""Extract current fund holdings from DisplayValuation.action JSON."""
|
|
out: list[FidelityHolding] = []
|
|
for v in payload.get("valuations", []):
|
|
asset = v.get("asset") or {}
|
|
fund_code = next(
|
|
(a.get("value") for a in asset.get("assetId", []) if a.get("type") == "FUND_CODE"),
|
|
None,
|
|
)
|
|
if not fund_code:
|
|
continue
|
|
fund_name = asset.get("name") or fund_code
|
|
units = Decimal(str((v.get("units") or {}).get("total") or 0))
|
|
price = (v.get("price") or {})
|
|
unit_price = Decimal(str(price.get("value") or 0))
|
|
currency = price.get("currency") or "GBP"
|
|
total = Decimal(str((v.get("valuation") or {}).get("total") or 0))
|
|
groups = (v.get("units") or {}).get("group", []) or []
|
|
by_src = {}
|
|
for g in groups:
|
|
if g.get("type") == "CONTRIBUTION_TYPE" and g.get("groupId"):
|
|
by_src[g["groupId"]] = Decimal(str(g.get("unit", {}).get("total") or 0))
|
|
out.append(FidelityHolding(
|
|
fund_code=fund_code,
|
|
fund_name=fund_name,
|
|
units=units,
|
|
unit_price=unit_price,
|
|
currency=currency,
|
|
total_value=total,
|
|
units_by_source=by_src,
|
|
))
|
|
return out
|