broker-sync/broker_sync/dedup.py

81 lines
2.7 KiB
Python
Raw Normal View History

from __future__ import annotations
import sqlite3
from collections.abc import Iterable
from datetime import UTC, datetime
from pathlib import Path
from broker_sync.models import Activity
_SCHEMA = """
CREATE TABLE IF NOT EXISTS sync_record (
provider TEXT NOT NULL,
account TEXT NOT NULL,
external_id TEXT NOT NULL,
wealthfolio_activity_id TEXT,
first_seen TEXT NOT NULL,
PRIMARY KEY (provider, account, external_id)
);
"""
class SyncRecordStore:
"""Authoritative local dedup store.
Wealthfolio's `notes` field is user-editable, so we cannot rely on it
for dedup. This SQLite-backed store is the source of truth for whether
a (provider, account, external_id) tuple has been imported.
"""
def __init__(self, db_path: Path | str) -> None:
self._path = Path(db_path)
self._path.parent.mkdir(parents=True, exist_ok=True)
with self._conn() as c:
c.executescript(_SCHEMA)
def _conn(self) -> sqlite3.Connection:
c = sqlite3.connect(self._path)
c.row_factory = sqlite3.Row
return c
def has_seen(self, provider: str, account: str, external_id: str) -> bool:
with self._conn() as c:
row = c.execute(
"SELECT 1 FROM sync_record "
"WHERE provider=? AND account=? AND external_id=?",
(provider, account, external_id),
).fetchone()
return row is not None
def record(
self,
provider: str,
account: str,
external_id: str,
wealthfolio_activity_id: str | None,
first_seen: datetime | None = None,
) -> None:
ts = (first_seen or datetime.now(UTC)).isoformat()
with self._conn() as c:
c.execute(
"INSERT OR IGNORE INTO sync_record "
"(provider, account, external_id, wealthfolio_activity_id, first_seen) "
"VALUES (?, ?, ?, ?, ?)",
(provider, account, external_id, wealthfolio_activity_id, ts),
)
c.commit()
def get(self, provider: str, account: str, external_id: str) -> dict[str, str | None] | None:
with self._conn() as c:
row = c.execute(
"SELECT wealthfolio_activity_id, first_seen FROM sync_record "
"WHERE provider=? AND account=? AND external_id=?",
(provider, account, external_id),
).fetchone()
if row is None:
return None
return dict(row)
def filter_new(self, provider: str, activities: Iterable[Activity]) -> list[Activity]:
return [a for a in activities if not self.has_seen(provider, a.account_id, a.external_id)]