130 lines
4.3 KiB
Python
130 lines
4.3 KiB
Python
|
|
"""Parsers for Fidelity UK PlanViewer scraped data.
|
||
|
|
|
||
|
|
Two inputs:
|
||
|
|
|
||
|
|
- **Transactions HTML** from ``/planviewer/DisplayMyPlanMemberTransHist.action``
|
||
|
|
rendered with a wide date range. The relevant <table> has
|
||
|
|
``id="myplan_member_transhist_support"``.
|
||
|
|
- **Valuation JSON** from the XHR ``/planviewer/DisplayValuation.action`` —
|
||
|
|
the SPA calls this to render the my-investments dashboard. Contains
|
||
|
|
current unit holdings + price + breakdown by contribution type.
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import hashlib
|
||
|
|
import re
|
||
|
|
from dataclasses import dataclass
|
||
|
|
from datetime import UTC, datetime
|
||
|
|
from decimal import Decimal
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
from bs4 import BeautifulSoup
|
||
|
|
|
||
|
|
_AMOUNT_RE = re.compile(r"\u00a3([\d,]+(?:\.\d+)?)")
|
||
|
|
|
||
|
|
# Fidelity transaction type strings we care about
|
||
|
|
_TX_DEPOSIT_TYPES = {
|
||
|
|
"regular premium",
|
||
|
|
"single premium",
|
||
|
|
"investment management rebate",
|
||
|
|
}
|
||
|
|
_TX_IGNORE_TYPES = {
|
||
|
|
"bulk switch", # pure reallocation, no cash impact
|
||
|
|
"fund switch",
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass(frozen=True)
|
||
|
|
class FidelityCashTx:
|
||
|
|
"""A single cash-impacting transaction from the transaction history page."""
|
||
|
|
date: datetime
|
||
|
|
tx_type: str # raw Fidelity label ("Regular Premium", "Single Premium", …)
|
||
|
|
amount: Decimal
|
||
|
|
external_id: str
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass(frozen=True)
|
||
|
|
class FidelityHolding:
|
||
|
|
"""A current fund-unit holding from DisplayValuation.action."""
|
||
|
|
fund_code: str
|
||
|
|
fund_name: str
|
||
|
|
units: Decimal
|
||
|
|
unit_price: Decimal
|
||
|
|
currency: str
|
||
|
|
total_value: Decimal
|
||
|
|
# Contribution-type breakdown ({"SASC": Decimal(...), "ERXS": Decimal(...)})
|
||
|
|
units_by_source: dict[str, Decimal]
|
||
|
|
|
||
|
|
|
||
|
|
def parse_transactions_html(html: str) -> list[FidelityCashTx]:
|
||
|
|
"""Extract cash-impacting transactions from the transaction history page.
|
||
|
|
|
||
|
|
Skips bulk switches (no cash movement) and header/total rows. Deterministic
|
||
|
|
external_id so re-runs dedup against the same rows.
|
||
|
|
"""
|
||
|
|
soup = BeautifulSoup(html, "html.parser")
|
||
|
|
out: list[FidelityCashTx] = []
|
||
|
|
for tr in soup.select("table#myplan_member_transhist_support tr"):
|
||
|
|
cells = [td.get_text(" ", strip=True) for td in tr.find_all("td")]
|
||
|
|
if len(cells) != 7:
|
||
|
|
continue
|
||
|
|
date_str, tx_type, _f, _c, _u, _p, amount_str = cells
|
||
|
|
m_date = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str)
|
||
|
|
if not m_date:
|
||
|
|
continue
|
||
|
|
tx_lower = tx_type.lower()
|
||
|
|
if tx_lower in _TX_IGNORE_TYPES or tx_type in ("-",):
|
||
|
|
continue
|
||
|
|
m_amt = _AMOUNT_RE.search(amount_str)
|
||
|
|
if not m_amt:
|
||
|
|
continue
|
||
|
|
amount = Decimal(m_amt.group(1).replace(",", ""))
|
||
|
|
if amount == 0:
|
||
|
|
continue
|
||
|
|
dd, mm, yyyy = m_date.groups()
|
||
|
|
dt = datetime(int(yyyy), int(mm), int(dd), tzinfo=UTC)
|
||
|
|
fp = hashlib.sha256(
|
||
|
|
f"{dt.isoformat()}|{tx_type}|{amount}".encode()
|
||
|
|
).hexdigest()[:16]
|
||
|
|
out.append(FidelityCashTx(
|
||
|
|
date=dt,
|
||
|
|
tx_type=tx_type,
|
||
|
|
amount=amount,
|
||
|
|
external_id=f"fidelity:tx:{fp}",
|
||
|
|
))
|
||
|
|
return out
|
||
|
|
|
||
|
|
|
||
|
|
def parse_valuation_json(payload: Any) -> list[FidelityHolding]:
|
||
|
|
"""Extract current fund holdings from DisplayValuation.action JSON."""
|
||
|
|
out: list[FidelityHolding] = []
|
||
|
|
for v in payload.get("valuations", []):
|
||
|
|
asset = v.get("asset") or {}
|
||
|
|
fund_code = next(
|
||
|
|
(a.get("value") for a in asset.get("assetId", []) if a.get("type") == "FUND_CODE"),
|
||
|
|
None,
|
||
|
|
)
|
||
|
|
if not fund_code:
|
||
|
|
continue
|
||
|
|
fund_name = asset.get("name") or fund_code
|
||
|
|
units = Decimal(str((v.get("units") or {}).get("total") or 0))
|
||
|
|
price = (v.get("price") or {})
|
||
|
|
unit_price = Decimal(str(price.get("value") or 0))
|
||
|
|
currency = price.get("currency") or "GBP"
|
||
|
|
total = Decimal(str((v.get("valuation") or {}).get("total") or 0))
|
||
|
|
groups = (v.get("units") or {}).get("group", []) or []
|
||
|
|
by_src = {}
|
||
|
|
for g in groups:
|
||
|
|
if g.get("type") == "CONTRIBUTION_TYPE" and g.get("groupId"):
|
||
|
|
by_src[g["groupId"]] = Decimal(str(g.get("unit", {}).get("total") or 0))
|
||
|
|
out.append(FidelityHolding(
|
||
|
|
fund_code=fund_code,
|
||
|
|
fund_name=fund_name,
|
||
|
|
units=units,
|
||
|
|
unit_price=unit_price,
|
||
|
|
currency=currency,
|
||
|
|
total_value=total,
|
||
|
|
units_by_source=by_src,
|
||
|
|
))
|
||
|
|
return out
|