Add crawl job progress drawer with phase tracking and live logs
- Add phase-aware progress reporting across all crawl phases (splitting, fetching, filtering, processing) with per-step counters - Add TaskProgressDrawer component with phase timeline stepper, detail counters, progress bar with ETA, and live worker log viewer - Add on_step_complete callback to ListingProcessor for granular tracking of details/images/OCR steps - Extend QuerySplitter on_progress callback with structured counter data - Capture celery worker logs via ring buffer handler and inject into task state updates for frontend display - Guard taskResult updates with phase presence check to prevent drawer from blanking during state transitions
This commit is contained in:
parent
4018503723
commit
b4837e1603
6 changed files with 617 additions and 24 deletions
|
|
@ -1,6 +1,7 @@
|
|||
from __future__ import annotations
|
||||
from abc import abstractmethod
|
||||
import asyncio
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import multiprocessing
|
||||
|
|
@ -22,6 +23,13 @@ class ListingProcessor:
|
|||
process_steps: list[Step]
|
||||
listing_repository: ListingRepository
|
||||
|
||||
# Map step class names to short names for progress reporting
|
||||
STEP_NAMES: dict[str, str] = {
|
||||
"FetchListingDetailsStep": "details",
|
||||
"FetchImagesStep": "images",
|
||||
"DetectFloorplanStep": "ocr",
|
||||
}
|
||||
|
||||
def __init__(self, listing_repository: ListingRepository):
|
||||
self.semaphore = asyncio.Semaphore(20)
|
||||
self.listing_repository = listing_repository
|
||||
|
|
@ -33,19 +41,28 @@ class ListingProcessor:
|
|||
DetectFloorplanStep(listing_repository),
|
||||
]
|
||||
|
||||
async def process_listing(self, listing_id: int) -> Listing | None:
|
||||
async def process_listing(
|
||||
self,
|
||||
listing_id: int,
|
||||
on_step_complete: Callable[[str], None] | None = None,
|
||||
) -> Listing | None:
|
||||
await self.listing_repository.mark_seen(listing_id)
|
||||
listing = None
|
||||
for step in self.process_steps:
|
||||
if await step.needs_processing(listing_id):
|
||||
async with self.semaphore:
|
||||
step_name = step.__class__.__name__
|
||||
step_class_name = step.__class__.__name__
|
||||
try:
|
||||
listing = await step.process(listing_id)
|
||||
logger.debug(f"[{listing_id}] {step_name} completed")
|
||||
logger.debug(f"[{listing_id}] {step_class_name} completed")
|
||||
if on_step_complete:
|
||||
short_name = self.STEP_NAMES.get(
|
||||
step_class_name, step_class_name
|
||||
)
|
||||
on_step_complete(short_name)
|
||||
except Exception as e:
|
||||
logger.error(f"[{listing_id}] {step_name} failed: {e}")
|
||||
celery_logger.error(f"[{listing_id}] {step_name} failed: {e}")
|
||||
logger.error(f"[{listing_id}] {step_class_name} failed: {e}")
|
||||
celery_logger.error(f"[{listing_id}] {step_class_name} failed: {e}")
|
||||
return None
|
||||
return listing
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue