diff --git a/payslip_ingest/__main__.py b/payslip_ingest/__main__.py index ffce922..f30293d 100644 --- a/payslip_ingest/__main__.py +++ b/payslip_ingest/__main__.py @@ -52,14 +52,24 @@ async def _backfill(tag: str, limit: int | None) -> None: bearer_token=os.environ["CLAUDE_AGENT_BEARER_TOKEN"], ) processed = 0 + failed = 0 try: async for doc in paperless.list_tagged_documents(tag): if limit is not None and processed >= limit: break doc_id = int(doc["id"]) - result = await process_document(doc_id, session_factory, paperless, extractor) - click.echo(f"doc_id={doc_id} status={result.status} validated={result.validated}") + try: + result = await process_document(doc_id, session_factory, paperless, extractor) + click.echo(f"doc_id={doc_id} status={result.status} validated={result.validated}") + except Exception as exc: + # Don't let a single bad doc (wrong tag, non-payslip PDF, Claude + # hallucinating null fields) abort the whole backfill. Log + continue. + failed += 1 + click.echo(f"doc_id={doc_id} status=failed error={type(exc).__name__}: {exc}", + err=True) + log.exception("backfill: doc_id=%s failed", doc_id) processed += 1 + click.echo(f"backfill complete: processed={processed} failed={failed}") finally: await paperless.aclose() await extractor.aclose()