Add crawl job progress drawer with phase tracking and live logs
- Add phase-aware progress reporting across all crawl phases (splitting, fetching, filtering, processing) with per-step counters - Add TaskProgressDrawer component with phase timeline stepper, detail counters, progress bar with ETA, and live worker log viewer - Add on_step_complete callback to ListingProcessor for granular tracking of details/images/OCR steps - Extend QuerySplitter on_progress callback with structured counter data - Capture celery worker logs via ring buffer handler and inject into task state updates for frontend display - Guard taskResult updates with phase presence check to prevent drawer from blanking during state transitions
This commit is contained in:
parent
4018503723
commit
b4837e1603
6 changed files with 617 additions and 24 deletions
|
|
@ -238,6 +238,8 @@ class QuerySplitter:
|
|||
parameters: Original query parameters to split.
|
||||
session: aiohttp session for making requests.
|
||||
on_progress: Optional callback for progress updates.
|
||||
Called as on_progress(phase, message, **kwargs) where kwargs
|
||||
contains structured data like subqueries_probed, etc.
|
||||
|
||||
Returns:
|
||||
List of SubQuery objects, each under the result threshold.
|
||||
|
|
@ -260,19 +262,32 @@ class QuerySplitter:
|
|||
on_progress(
|
||||
phase="splitting",
|
||||
message=f"Created {len(initial_subqueries)} initial subqueries",
|
||||
subqueries_initial=len(initial_subqueries),
|
||||
subqueries_probed=0,
|
||||
)
|
||||
|
||||
# Phase 2: Probe and adaptively split
|
||||
semaphore = asyncio.Semaphore(self.config.max_concurrent_requests)
|
||||
refined_subqueries: list[SubQuery] = []
|
||||
probed_count = 0
|
||||
|
||||
# Probe all initial subqueries in parallel
|
||||
async def probe_and_split(sq: SubQuery) -> list[SubQuery]:
|
||||
nonlocal probed_count
|
||||
async with semaphore:
|
||||
await asyncio.sleep(self.config.request_delay_ms / 1000)
|
||||
count = await self.probe_result_count(sq, session, parameters)
|
||||
|
||||
sq = replace(sq, estimated_results=count)
|
||||
probed_count += 1
|
||||
|
||||
if on_progress:
|
||||
on_progress(
|
||||
phase="splitting",
|
||||
message=f"Probed {probed_count}/{len(initial_subqueries)} subqueries",
|
||||
subqueries_initial=len(initial_subqueries),
|
||||
subqueries_probed=probed_count,
|
||||
)
|
||||
|
||||
if count > self.config.split_threshold:
|
||||
logger.info(
|
||||
|
|
@ -294,10 +309,14 @@ class QuerySplitter:
|
|||
f"Refined to {len(refined_subqueries)} subqueries after splitting"
|
||||
)
|
||||
|
||||
total_estimated = self.calculate_total_estimated_results(refined_subqueries)
|
||||
|
||||
if on_progress:
|
||||
on_progress(
|
||||
phase="splitting_complete",
|
||||
message=f"Refined to {len(refined_subqueries)} subqueries",
|
||||
subqueries_total=len(refined_subqueries),
|
||||
estimated_results=total_estimated,
|
||||
)
|
||||
|
||||
return refined_subqueries
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue