Add SyncRecordStore for authoritative dedup

Context
-------
Wealthfolio's activity `notes` field is user-editable via the UI, so
using it as the dedup key would let a single note-edit in Wealthfolio
cause the next sync to create a duplicate. Stress-testing the plan
flagged this as the top structural risk.

This change
-----------
- SQLite-backed store at `/data/broker_sync.db` in production; keyed on
  (provider, account, external_id) so each provider's id space is
  scoped to its own account.
- `INSERT OR IGNORE` makes record() idempotent — second call with the
  same key is a no-op and preserves the original wealthfolio_activity_id
  plus first_seen timestamp.
- `filter_new()` is the integration point: provider fetches activities,
  hands them to the store, gets back only the unseen subset to submit
  to the Wealthfolio sink.
- Wealthfolio activity id returned by the API is persisted alongside
  each record so the HMRC FX reconciliation job can later PATCH the
  original activity rather than creating a new one.

Test plan
---------
## Automated
- poetry run pytest tests/test_dedup.py -v  →  6 passed
- poetry run mypy broker_sync tests  →  Success: no issues found in 6 source files
- poetry run ruff check .  →  All checks passed!

## Manual Verification
Not applicable for this layer — full end-to-end verification happens
once a provider + sink land (Phase 1 Trading212 and the auth spike).
This commit is contained in:
Viktor Barzin 2026-04-17 19:17:12 +00:00
parent a2aa7ec486
commit a66ef189f6
3 changed files with 149 additions and 3 deletions

67
tests/test_dedup.py Normal file
View file

@ -0,0 +1,67 @@
from datetime import UTC, datetime
from decimal import Decimal
from pathlib import Path
from broker_sync.dedup import SyncRecordStore
from broker_sync.models import AccountType, Activity, ActivityType
def _buy(external_id: str) -> Activity:
return Activity(
external_id=external_id,
account_id="t212-isa",
account_type=AccountType.ISA,
date=datetime(2026, 1, 1, tzinfo=UTC),
activity_type=ActivityType.BUY,
symbol="VUAG",
quantity=Decimal("1"),
unit_price=Decimal("100"),
currency="GBP",
)
def test_store_schema_is_idempotent(tmp_path: Path) -> None:
db = tmp_path / "s.db"
SyncRecordStore(db) # creates schema
SyncRecordStore(db) # second open must not raise
assert db.exists()
def test_has_seen_returns_false_for_new(tmp_path: Path) -> None:
s = SyncRecordStore(tmp_path / "s.db")
assert s.has_seen("trading212", "t212-isa", "order-1") is False
def test_record_then_has_seen(tmp_path: Path) -> None:
s = SyncRecordStore(tmp_path / "s.db")
s.record("trading212", "t212-isa", "order-1", wealthfolio_activity_id="wf-42")
assert s.has_seen("trading212", "t212-isa", "order-1") is True
# Same (provider, account, external_id) from a different caller is still seen.
assert s.has_seen("trading212", "t212-isa", "order-1") is True
def test_record_is_idempotent(tmp_path: Path) -> None:
s = SyncRecordStore(tmp_path / "s.db")
s.record("trading212", "t212-isa", "order-1", wealthfolio_activity_id="wf-42")
s.record("trading212", "t212-isa", "order-1", wealthfolio_activity_id="wf-43")
# Second insert must not raise. Original first_seen / wealthfolio id preserved.
stored = s.get("trading212", "t212-isa", "order-1")
assert stored is not None
assert stored["wealthfolio_activity_id"] == "wf-42"
def test_scope_per_provider_and_account(tmp_path: Path) -> None:
s = SyncRecordStore(tmp_path / "s.db")
s.record("trading212", "t212-isa", "order-1", wealthfolio_activity_id="wf-1")
# Different provider, same external_id — NOT seen.
assert s.has_seen("invest-engine", "t212-isa", "order-1") is False
# Different account, same external_id — NOT seen.
assert s.has_seen("trading212", "t212-invest", "order-1") is False
def test_filter_new_drops_seen(tmp_path: Path) -> None:
s = SyncRecordStore(tmp_path / "s.db")
s.record("trading212", "t212-isa", "a", wealthfolio_activity_id=None)
activities = [_buy("a"), _buy("b"), _buy("c")]
fresh = s.filter_new("trading212", activities)
assert [a.external_id for a in fresh] == ["b", "c"]