Add crawl job progress drawer with phase tracking and live logs

- Add phase-aware progress reporting across all crawl phases (splitting,
  fetching, filtering, processing) with per-step counters
- Add TaskProgressDrawer component with phase timeline stepper, detail
  counters, progress bar with ETA, and live worker log viewer
- Add on_step_complete callback to ListingProcessor for granular tracking
  of details/images/OCR steps
- Extend QuerySplitter on_progress callback with structured counter data
- Capture celery worker logs via ring buffer handler and inject into task
  state updates for frontend display
- Guard taskResult updates with phase presence check to prevent drawer
  from blanking during state transitions
This commit is contained in:
Viktor Barzin 2026-02-06 22:37:53 +00:00
parent 4018503723
commit b4837e1603
No known key found for this signature in database
GPG key ID: 0EB088298288D958
6 changed files with 617 additions and 24 deletions

View file

@ -238,6 +238,8 @@ class QuerySplitter:
parameters: Original query parameters to split.
session: aiohttp session for making requests.
on_progress: Optional callback for progress updates.
Called as on_progress(phase, message, **kwargs) where kwargs
contains structured data like subqueries_probed, etc.
Returns:
List of SubQuery objects, each under the result threshold.
@ -260,19 +262,32 @@ class QuerySplitter:
on_progress(
phase="splitting",
message=f"Created {len(initial_subqueries)} initial subqueries",
subqueries_initial=len(initial_subqueries),
subqueries_probed=0,
)
# Phase 2: Probe and adaptively split
semaphore = asyncio.Semaphore(self.config.max_concurrent_requests)
refined_subqueries: list[SubQuery] = []
probed_count = 0
# Probe all initial subqueries in parallel
async def probe_and_split(sq: SubQuery) -> list[SubQuery]:
nonlocal probed_count
async with semaphore:
await asyncio.sleep(self.config.request_delay_ms / 1000)
count = await self.probe_result_count(sq, session, parameters)
sq = replace(sq, estimated_results=count)
probed_count += 1
if on_progress:
on_progress(
phase="splitting",
message=f"Probed {probed_count}/{len(initial_subqueries)} subqueries",
subqueries_initial=len(initial_subqueries),
subqueries_probed=probed_count,
)
if count > self.config.split_threshold:
logger.info(
@ -294,10 +309,14 @@ class QuerySplitter:
f"Refined to {len(refined_subqueries)} subqueries after splitting"
)
total_estimated = self.calculate_total_estimated_results(refined_subqueries)
if on_progress:
on_progress(
phase="splitting_complete",
message=f"Refined to {len(refined_subqueries)} subqueries",
subqueries_total=len(refined_subqueries),
estimated_results=total_estimated,
)
return refined_subqueries