payslip-ingest/payslip_ingest/sync/actualbudget.py
Viktor Barzin 08f28ad581 sync: ActualBudget Meta deposit overlay (Phase C)
Adds daily sync of Meta payroll deposits from ActualBudget into
payslip_ingest.external_meta_deposits, enabling the dashboard to overlay
bank deposits against payslip net_pay and surface parser drift on net.

- Migration 0007: new table external_meta_deposits, unique on
  actualbudget_tx_id, indexed on deposit_date.
- payslip_ingest.sync.actualbudget: narrow client for the
  jhonderson/actual-http-api sidecar (list accounts + transactions).
  Filters on payee regex (META|FACEBOOK, word-boundary). Idempotent
  upsert — ON CONFLICT DO NOTHING on actualbudget_tx_id. Surfaces
  clear error if the transactions endpoint is missing so the operator
  can switch to a SQLite-mount fallback.
- CLI command: `python -m payslip_ingest sync-meta-deposits` driven by
  4 env vars (ACTUALBUDGET_HTTP_API_URL, API_KEY, ENCRYPTION_PASSWORD,
  BUDGET_SYNC_ID).
- Tests: 5 — regex positive/negative, full sync insert, idempotency,
  404-endpoint failure mode.

Part of: code-860
2026-04-19 18:20:50 +00:00

206 lines
6.8 KiB
Python

"""ActualBudget HTTP API client — pull Meta payroll deposits.
Reads from the jhonderson/actual-http-api sidecar in the actualbudget
namespace. Looks up accounts on the given budget, enumerates all
transactions across them, keeps only transactions whose payee name
matches a Meta pattern (META, FACEBOOK, META PLATFORMS etc.).
Idempotent: each sync run upserts on `actualbudget_tx_id`; existing rows
are untouched. Deletions in ActualBudget are NOT propagated.
"""
from __future__ import annotations
import logging
import re
from dataclasses import dataclass
from datetime import date, datetime
from decimal import Decimal
from typing import Any
import httpx
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
from sqlalchemy.ext.asyncio import async_sessionmaker
from payslip_ingest.db import ExternalMetaDeposit
log = logging.getLogger(__name__)
# Payee pattern. ActualBudget normalizes payee strings but the raw bank
# description can include country code / spacing variants. Match the
# common forms observed in the viktor budget.
META_PAYEE_RE = re.compile(r"\b(META|FACEBOOK)\b", re.IGNORECASE)
class ActualBudgetError(RuntimeError):
pass
@dataclass
class SyncResult:
accounts_scanned: int = 0
transactions_fetched: int = 0
meta_deposits_matched: int = 0
inserted: int = 0
skipped_existing: int = 0
class ActualBudgetClient:
"""Narrow client for the jhonderson/actual-http-api endpoints we need."""
def __init__(
self,
base_url: str,
api_key: str,
encryption_password: str,
budget_sync_id: str,
client: httpx.AsyncClient | None = None,
):
self._base_url = base_url.rstrip("/")
self._budget = budget_sync_id
self._headers = {
"accept": "application/json",
"x-api-key": api_key,
"budget-encryption-password": encryption_password,
}
self._client = client or httpx.AsyncClient(timeout=60.0)
self._owns_client = client is None
async def aclose(self) -> None:
if self._owns_client:
await self._client.aclose()
async def __aenter__(self) -> ActualBudgetClient:
return self
async def __aexit__(self, *exc: object) -> None:
await self.aclose()
async def list_accounts(self) -> list[dict[str, Any]]:
resp = await self._client.get(
f"{self._base_url}/v1/budgets/{self._budget}/accounts",
headers=self._headers,
)
resp.raise_for_status()
data = resp.json().get("data", [])
if not isinstance(data, list):
raise ActualBudgetError(f"accounts response not a list: {data!r}")
return data
async def list_transactions(self, account_id: str) -> list[dict[str, Any]]:
"""List all transactions for an account.
jhonderson/actual-http-api GET endpoint may return `data` as a list.
If the endpoint is missing (older image), surface a clear error so
the operator can switch to the SQLite-mount fallback.
"""
resp = await self._client.get(
f"{self._base_url}/v1/budgets/{self._budget}/accounts/{account_id}/transactions",
headers=self._headers,
)
if resp.status_code == 404:
raise ActualBudgetError(
"transaction-list endpoint not found — the http-api image may be too old; "
"fall back to reading SQLite directly")
resp.raise_for_status()
data = resp.json().get("data", [])
if not isinstance(data, list):
raise ActualBudgetError(f"transactions response not a list: {data!r}")
return data
async def sync_meta_deposits(
client: ActualBudgetClient,
db_session_factory: async_sessionmaker[Any],
) -> SyncResult:
"""Enumerate every transaction across every account, upsert Meta deposits."""
accounts = await client.list_accounts()
result = SyncResult(accounts_scanned=len(accounts))
for account in accounts:
account_id = account.get("id")
if not isinstance(account_id, str):
log.warning("skipping account without id: %r", account)
continue
txs = await client.list_transactions(account_id)
result.transactions_fetched += len(txs)
for tx in txs:
if not _is_meta_deposit(tx):
continue
result.meta_deposits_matched += 1
was_new = await _upsert(db_session_factory, tx)
if was_new:
result.inserted += 1
else:
result.skipped_existing += 1
return result
def _is_meta_deposit(tx: dict[str, Any]) -> bool:
"""Positive deposit (credit) where payee contains META / FACEBOOK."""
amount = tx.get("amount")
if not isinstance(amount, int | float):
return False
# ActualBudget stores amounts in cents (int); positive = incoming.
if amount <= 0:
return False
payee = tx.get("payee_name") or tx.get("payee") or ""
if not isinstance(payee, str):
return False
return bool(META_PAYEE_RE.search(payee))
async def _upsert(
db_session_factory: async_sessionmaker[Any],
tx: dict[str, Any],
) -> bool:
"""Insert the row; return True if newly inserted, False if it already existed.
Uses a dialect-aware ON CONFLICT DO NOTHING upsert — Postgres in prod and
SQLite in tests both support this.
"""
tx_id = tx["id"]
amount_cents = int(tx["amount"])
amount = (Decimal(amount_cents) / Decimal(100)).quantize(Decimal("0.01"))
deposit_date = _parse_date(tx["date"])
payee = tx.get("payee_name") or tx.get("payee") or None
memo = tx.get("notes") or tx.get("memo") or None
async with db_session_factory() as session:
existing = await session.execute(
select(ExternalMetaDeposit.id).where(
ExternalMetaDeposit.actualbudget_tx_id == tx_id))
if existing.scalar() is not None:
return False
async with db_session_factory() as session, session.begin():
bind = session.bind
dialect = bind.dialect.name if bind is not None else "postgresql"
stmt_cls = pg_insert if dialect == "postgresql" else sqlite_insert
stmt = stmt_cls(ExternalMetaDeposit).values(
actualbudget_tx_id=tx_id,
deposit_date=deposit_date,
amount=amount,
payee=payee,
memo=memo,
).on_conflict_do_nothing(index_elements=[ExternalMetaDeposit.actualbudget_tx_id])
await session.execute(stmt)
return True
def _parse_date(raw: str) -> date:
return datetime.strptime(raw, "%Y-%m-%d").date()
__all__ = [
"ActualBudgetClient",
"ActualBudgetError",
"META_PAYEE_RE",
"SyncResult",
"sync_meta_deposits",
]