Add crawl job progress drawer with phase tracking and live logs

- Add phase-aware progress reporting across all crawl phases (splitting,
  fetching, filtering, processing) with per-step counters
- Add TaskProgressDrawer component with phase timeline stepper, detail
  counters, progress bar with ETA, and live worker log viewer
- Add on_step_complete callback to ListingProcessor for granular tracking
  of details/images/OCR steps
- Extend QuerySplitter on_progress callback with structured counter data
- Capture celery worker logs via ring buffer handler and inject into task
  state updates for frontend display
- Guard taskResult updates with phase presence check to prevent drawer
  from blanking during state transitions
This commit is contained in:
Viktor Barzin 2026-02-06 22:37:53 +00:00
parent 4018503723
commit b4837e1603
No known key found for this signature in database
GPG key ID: 0EB088298288D958
6 changed files with 617 additions and 24 deletions

View file

@ -1,6 +1,7 @@
from __future__ import annotations
from abc import abstractmethod
import asyncio
from collections.abc import Callable
from datetime import datetime
import logging
import multiprocessing
@ -22,6 +23,13 @@ class ListingProcessor:
process_steps: list[Step]
listing_repository: ListingRepository
# Map step class names to short names for progress reporting
STEP_NAMES: dict[str, str] = {
"FetchListingDetailsStep": "details",
"FetchImagesStep": "images",
"DetectFloorplanStep": "ocr",
}
def __init__(self, listing_repository: ListingRepository):
self.semaphore = asyncio.Semaphore(20)
self.listing_repository = listing_repository
@ -33,19 +41,28 @@ class ListingProcessor:
DetectFloorplanStep(listing_repository),
]
async def process_listing(self, listing_id: int) -> Listing | None:
async def process_listing(
self,
listing_id: int,
on_step_complete: Callable[[str], None] | None = None,
) -> Listing | None:
await self.listing_repository.mark_seen(listing_id)
listing = None
for step in self.process_steps:
if await step.needs_processing(listing_id):
async with self.semaphore:
step_name = step.__class__.__name__
step_class_name = step.__class__.__name__
try:
listing = await step.process(listing_id)
logger.debug(f"[{listing_id}] {step_name} completed")
logger.debug(f"[{listing_id}] {step_class_name} completed")
if on_step_complete:
short_name = self.STEP_NAMES.get(
step_class_name, step_class_name
)
on_step_complete(short_name)
except Exception as e:
logger.error(f"[{listing_id}] {step_name} failed: {e}")
celery_logger.error(f"[{listing_id}] {step_name} failed: {e}")
logger.error(f"[{listing_id}] {step_class_name} failed: {e}")
celery_logger.error(f"[{listing_id}] {step_class_name} failed: {e}")
return None
return listing