"""Parsers for Fidelity UK PlanViewer scraped data.
Two inputs:
- **Transactions HTML** from ``/planviewer/DisplayMyPlanMemberTransHist.action``
rendered with a wide date range. The relevant
has
``id="myplan_member_transhist_support"``.
- **Valuation JSON** from the XHR ``/planviewer/DisplayValuation.action`` —
the SPA calls this to render the my-investments dashboard. Contains
current unit holdings + price + breakdown by contribution type.
"""
from __future__ import annotations
import hashlib
import re
from dataclasses import dataclass
from datetime import UTC, datetime
from decimal import Decimal
from typing import Any
from bs4 import BeautifulSoup
_AMOUNT_RE = re.compile(r"\u00a3([\d,]+(?:\.\d+)?)")
# Fidelity transaction type strings we care about
_TX_DEPOSIT_TYPES = {
"regular premium",
"single premium",
"investment management rebate",
}
_TX_IGNORE_TYPES = {
"bulk switch", # pure reallocation, no cash impact
"fund switch",
}
@dataclass(frozen=True)
class FidelityCashTx:
"""A single cash-impacting transaction from the transaction history page."""
date: datetime
tx_type: str # raw Fidelity label ("Regular Premium", "Single Premium", …)
amount: Decimal
external_id: str
@dataclass(frozen=True)
class FidelityHolding:
"""A current fund-unit holding from DisplayValuation.action."""
fund_code: str
fund_name: str
units: Decimal
unit_price: Decimal
currency: str
total_value: Decimal
# Contribution-type breakdown ({"SASC": Decimal(...), "ERXS": Decimal(...)})
units_by_source: dict[str, Decimal]
def parse_transactions_html(html: str) -> list[FidelityCashTx]:
"""Extract cash-impacting transactions from the transaction history page.
Skips bulk switches (no cash movement) and header/total rows. Deterministic
external_id so re-runs dedup against the same rows.
"""
soup = BeautifulSoup(html, "html.parser")
out: list[FidelityCashTx] = []
for tr in soup.select("table#myplan_member_transhist_support tr"):
cells = [td.get_text(" ", strip=True) for td in tr.find_all("td")]
if len(cells) != 7:
continue
date_str, tx_type, _f, _c, _u, _p, amount_str = cells
m_date = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str)
if not m_date:
continue
tx_lower = tx_type.lower()
if tx_lower in _TX_IGNORE_TYPES or tx_type in ("-",):
continue
m_amt = _AMOUNT_RE.search(amount_str)
if not m_amt:
continue
amount = Decimal(m_amt.group(1).replace(",", ""))
if amount == 0:
continue
dd, mm, yyyy = m_date.groups()
dt = datetime(int(yyyy), int(mm), int(dd), tzinfo=UTC)
fp = hashlib.sha256(
f"{dt.isoformat()}|{tx_type}|{amount}".encode()
).hexdigest()[:16]
out.append(FidelityCashTx(
date=dt,
tx_type=tx_type,
amount=amount,
external_id=f"fidelity:tx:{fp}",
))
return out
def parse_valuation_json(payload: Any) -> list[FidelityHolding]:
"""Extract current fund holdings from DisplayValuation.action JSON."""
out: list[FidelityHolding] = []
for v in payload.get("valuations", []):
asset = v.get("asset") or {}
fund_code = next(
(a.get("value") for a in asset.get("assetId", []) if a.get("type") == "FUND_CODE"),
None,
)
if not fund_code:
continue
fund_name = asset.get("name") or fund_code
units = Decimal(str((v.get("units") or {}).get("total") or 0))
price = (v.get("price") or {})
unit_price = Decimal(str(price.get("value") or 0))
currency = price.get("currency") or "GBP"
total = Decimal(str((v.get("valuation") or {}).get("total") or 0))
groups = (v.get("units") or {}).get("group", []) or []
by_src = {}
for g in groups:
if g.get("type") == "CONTRIBUTION_TYPE" and g.get("groupId"):
by_src[g["groupId"]] = Decimal(str(g.get("unit", {}).get("total") or 0))
out.append(FidelityHolding(
fund_code=fund_code,
fund_name=fund_name,
units=units,
unit_price=unit_price,
currency=currency,
total_value=total,
units_by_source=by_src,
))
return out