"""Parsers for Fidelity UK PlanViewer scraped data. Two inputs: - **Transactions HTML** from ``/planviewer/DisplayMyPlanMemberTransHist.action`` rendered with a wide date range. The relevant has ``id="myplan_member_transhist_support"``. - **Valuation JSON** from the XHR ``/planviewer/DisplayValuation.action`` — the SPA calls this to render the my-investments dashboard. Contains current unit holdings + price + breakdown by contribution type. """ from __future__ import annotations import hashlib import re from dataclasses import dataclass from datetime import UTC, datetime from decimal import Decimal from typing import Any from bs4 import BeautifulSoup _AMOUNT_RE = re.compile(r"\u00a3([\d,]+(?:\.\d+)?)") # Fidelity transaction type strings we care about _TX_DEPOSIT_TYPES = { "regular premium", "single premium", "investment management rebate", } _TX_IGNORE_TYPES = { "bulk switch", # pure reallocation, no cash impact "fund switch", } @dataclass(frozen=True) class FidelityCashTx: """A single cash-impacting transaction from the transaction history page.""" date: datetime tx_type: str # raw Fidelity label ("Regular Premium", "Single Premium", …) amount: Decimal external_id: str @dataclass(frozen=True) class FidelityHolding: """A current fund-unit holding from DisplayValuation.action.""" fund_code: str fund_name: str units: Decimal unit_price: Decimal currency: str total_value: Decimal # Contribution-type breakdown ({"SASC": Decimal(...), "ERXS": Decimal(...)}) units_by_source: dict[str, Decimal] def parse_transactions_html(html: str) -> list[FidelityCashTx]: """Extract cash-impacting transactions from the transaction history page. Skips bulk switches (no cash movement) and header/total rows. Deterministic external_id so re-runs dedup against the same rows. """ soup = BeautifulSoup(html, "html.parser") out: list[FidelityCashTx] = [] for tr in soup.select("table#myplan_member_transhist_support tr"): cells = [td.get_text(" ", strip=True) for td in tr.find_all("td")] if len(cells) != 7: continue date_str, tx_type, _f, _c, _u, _p, amount_str = cells m_date = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str) if not m_date: continue tx_lower = tx_type.lower() if tx_lower in _TX_IGNORE_TYPES or tx_type in ("-",): continue m_amt = _AMOUNT_RE.search(amount_str) if not m_amt: continue amount = Decimal(m_amt.group(1).replace(",", "")) if amount == 0: continue dd, mm, yyyy = m_date.groups() dt = datetime(int(yyyy), int(mm), int(dd), tzinfo=UTC) fp = hashlib.sha256( f"{dt.isoformat()}|{tx_type}|{amount}".encode() ).hexdigest()[:16] out.append(FidelityCashTx( date=dt, tx_type=tx_type, amount=amount, external_id=f"fidelity:tx:{fp}", )) return out def parse_valuation_json(payload: Any) -> list[FidelityHolding]: """Extract current fund holdings from DisplayValuation.action JSON.""" out: list[FidelityHolding] = [] for v in payload.get("valuations", []): asset = v.get("asset") or {} fund_code = next( (a.get("value") for a in asset.get("assetId", []) if a.get("type") == "FUND_CODE"), None, ) if not fund_code: continue fund_name = asset.get("name") or fund_code units = Decimal(str((v.get("units") or {}).get("total") or 0)) price = (v.get("price") or {}) unit_price = Decimal(str(price.get("value") or 0)) currency = price.get("currency") or "GBP" total = Decimal(str((v.get("valuation") or {}).get("total") or 0)) groups = (v.get("units") or {}).get("group", []) or [] by_src = {} for g in groups: if g.get("type") == "CONTRIBUTION_TYPE" and g.get("groupId"): by_src[g["groupId"]] = Decimal(str(g.get("unit", {}).get("total") or 0)) out.append(FidelityHolding( fund_code=fund_code, fund_name=fund_name, units=units, unit_price=unit_price, currency=currency, total_value=total, units_by_source=by_src, )) return out