Stream-process listings as IDs arrive via asyncio.Queue
Replace the sequential fetch-all-then-process pipeline with a streaming architecture where listing processing starts as soon as IDs become available from each subquery. A producer task fetches pages and enqueues new IDs (filtered inline against DB), while 20 consumer workers process listings concurrently from the queue. - Add ListingRepository.get_listing_ids() for fast ID-only projection - Refactor listing_tasks.py: remove get_ids_to_process/dump_listings_and_monitor, replace with unified producer/worker/monitor pipeline - Apply same pattern to CLI path in listing_fetcher.py - Remove 'filtering' phase from frontend, show combined fetch+process metrics - Add fetching_done flag to TaskResult for phase transition tracking
This commit is contained in:
parent
7e8f1f0339
commit
b9f576ae2b
6 changed files with 372 additions and 420 deletions
|
|
@ -180,7 +180,6 @@ export function TaskIndicator({ taskID, onTaskCancelled }: TaskIndicatorProps) {
|
|||
splitting: 'Splitting',
|
||||
splitting_complete: 'Split done',
|
||||
fetching: 'Fetching',
|
||||
filtering: 'Filtering',
|
||||
};
|
||||
return phaseLabels[taskResult.phase] ?? `${Math.round(progressPercentage)}%`;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,9 +23,8 @@ interface TaskProgressDrawerProps {
|
|||
|
||||
const PHASES: { key: TaskPhase; label: string }[] = [
|
||||
{ key: 'splitting', label: 'Splitting queries' },
|
||||
{ key: 'fetching', label: 'Fetching listings' },
|
||||
{ key: 'filtering', label: 'Filtering results' },
|
||||
{ key: 'processing', label: 'Processing listings' },
|
||||
{ key: 'fetching', label: 'Fetching & processing' },
|
||||
{ key: 'processing', label: 'Processing remaining' },
|
||||
];
|
||||
|
||||
function getPhaseIndex(phase: TaskPhase | undefined): number {
|
||||
|
|
@ -175,7 +174,7 @@ function PhaseDetails({ result }: { result: TaskResult }) {
|
|||
return (
|
||||
<div className="rounded-md border p-3 space-y-1">
|
||||
<p className="text-xs font-medium text-muted-foreground uppercase tracking-wide mb-2">
|
||||
Fetching
|
||||
{result.fetching_done ? 'Fetching complete' : 'Fetching & processing'}
|
||||
</p>
|
||||
<CounterRow
|
||||
label="Subqueries completed"
|
||||
|
|
@ -184,19 +183,24 @@ function PhaseDetails({ result }: { result: TaskResult }) {
|
|||
/>
|
||||
<CounterRow label="IDs collected" value={result.ids_collected} />
|
||||
<CounterRow label="Pages fetched" value={result.pages_fetched} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (phase === 'filtering') {
|
||||
return (
|
||||
<div className="rounded-md border p-3 space-y-1">
|
||||
<p className="text-xs font-medium text-muted-foreground uppercase tracking-wide mb-2">
|
||||
Filtering
|
||||
</p>
|
||||
<CounterRow label="Total from API" value={result.total_found} />
|
||||
<CounterRow label="Already in DB" value={result.existing_in_db} />
|
||||
<CounterRow label="New to process" value={result.new_listings} />
|
||||
{(result.details_fetched !== undefined && result.details_fetched > 0) && (
|
||||
<>
|
||||
<div className="border-t my-2" />
|
||||
<CounterRow
|
||||
label="Details fetched"
|
||||
value={result.details_fetched}
|
||||
total={result.total}
|
||||
/>
|
||||
<CounterRow label="Images downloaded" value={result.images_downloaded} />
|
||||
<CounterRow label="OCR completed" value={result.ocr_completed} />
|
||||
{(result.failed ?? 0) > 0 && (
|
||||
<div className="flex justify-between text-sm">
|
||||
<span className="text-red-500">Failed</span>
|
||||
<span className="font-mono tabular-nums text-red-500">{result.failed}</span>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -306,7 +310,7 @@ export function TaskProgressDrawer({
|
|||
|
||||
{taskResult && <PhaseDetails result={taskResult} />}
|
||||
|
||||
{taskResult && taskResult.phase === 'processing' && (
|
||||
{taskResult && (taskResult.phase === 'processing' || taskResult.phase === 'fetching') && (taskResult.total ?? 0) > 0 && (
|
||||
<div className="space-y-1">
|
||||
<div className="w-full h-2 bg-primary/20 rounded-full overflow-hidden">
|
||||
<div
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ export interface TaskStatusResponse {
|
|||
message?: string;
|
||||
}
|
||||
|
||||
export type TaskPhase = 'splitting' | 'splitting_complete' | 'fetching' | 'filtering' | 'processing' | 'completed';
|
||||
export type TaskPhase = 'splitting' | 'splitting_complete' | 'fetching' | 'processing' | 'completed';
|
||||
|
||||
export interface TaskResult {
|
||||
progress: number;
|
||||
|
|
@ -69,10 +69,7 @@ export interface TaskResult {
|
|||
subqueries_completed?: number;
|
||||
ids_collected?: number;
|
||||
pages_fetched?: number;
|
||||
// Filtering phase
|
||||
total_found?: number;
|
||||
existing_in_db?: number;
|
||||
new_listings?: number;
|
||||
fetching_done?: boolean;
|
||||
// Processing phase
|
||||
details_fetched?: number;
|
||||
images_downloaded?: number;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue