Stream-process listings as IDs arrive via asyncio.Queue

Replace the sequential fetch-all-then-process pipeline with a streaming
architecture where listing processing starts as soon as IDs become
available from each subquery. A producer task fetches pages and enqueues
new IDs (filtered inline against DB), while 20 consumer workers process
listings concurrently from the queue.

- Add ListingRepository.get_listing_ids() for fast ID-only projection
- Refactor listing_tasks.py: remove get_ids_to_process/dump_listings_and_monitor,
  replace with unified producer/worker/monitor pipeline
- Apply same pattern to CLI path in listing_fetcher.py
- Remove 'filtering' phase from frontend, show combined fetch+process metrics
- Add fetching_done flag to TaskResult for phase transition tracking
This commit is contained in:
Viktor Barzin 2026-02-06 23:43:54 +00:00
parent 7e8f1f0339
commit b9f576ae2b
No known key found for this signature in database
GPG key ID: 0EB088298288D958
6 changed files with 372 additions and 420 deletions

View file

@ -180,7 +180,6 @@ export function TaskIndicator({ taskID, onTaskCancelled }: TaskIndicatorProps) {
splitting: 'Splitting',
splitting_complete: 'Split done',
fetching: 'Fetching',
filtering: 'Filtering',
};
return phaseLabels[taskResult.phase] ?? `${Math.round(progressPercentage)}%`;
}

View file

@ -23,9 +23,8 @@ interface TaskProgressDrawerProps {
const PHASES: { key: TaskPhase; label: string }[] = [
{ key: 'splitting', label: 'Splitting queries' },
{ key: 'fetching', label: 'Fetching listings' },
{ key: 'filtering', label: 'Filtering results' },
{ key: 'processing', label: 'Processing listings' },
{ key: 'fetching', label: 'Fetching & processing' },
{ key: 'processing', label: 'Processing remaining' },
];
function getPhaseIndex(phase: TaskPhase | undefined): number {
@ -175,7 +174,7 @@ function PhaseDetails({ result }: { result: TaskResult }) {
return (
<div className="rounded-md border p-3 space-y-1">
<p className="text-xs font-medium text-muted-foreground uppercase tracking-wide mb-2">
Fetching
{result.fetching_done ? 'Fetching complete' : 'Fetching & processing'}
</p>
<CounterRow
label="Subqueries completed"
@ -184,19 +183,24 @@ function PhaseDetails({ result }: { result: TaskResult }) {
/>
<CounterRow label="IDs collected" value={result.ids_collected} />
<CounterRow label="Pages fetched" value={result.pages_fetched} />
</div>
);
}
if (phase === 'filtering') {
return (
<div className="rounded-md border p-3 space-y-1">
<p className="text-xs font-medium text-muted-foreground uppercase tracking-wide mb-2">
Filtering
</p>
<CounterRow label="Total from API" value={result.total_found} />
<CounterRow label="Already in DB" value={result.existing_in_db} />
<CounterRow label="New to process" value={result.new_listings} />
{(result.details_fetched !== undefined && result.details_fetched > 0) && (
<>
<div className="border-t my-2" />
<CounterRow
label="Details fetched"
value={result.details_fetched}
total={result.total}
/>
<CounterRow label="Images downloaded" value={result.images_downloaded} />
<CounterRow label="OCR completed" value={result.ocr_completed} />
{(result.failed ?? 0) > 0 && (
<div className="flex justify-between text-sm">
<span className="text-red-500">Failed</span>
<span className="font-mono tabular-nums text-red-500">{result.failed}</span>
</div>
)}
</>
)}
</div>
);
}
@ -306,7 +310,7 @@ export function TaskProgressDrawer({
{taskResult && <PhaseDetails result={taskResult} />}
{taskResult && taskResult.phase === 'processing' && (
{taskResult && (taskResult.phase === 'processing' || taskResult.phase === 'fetching') && (taskResult.total ?? 0) > 0 && (
<div className="space-y-1">
<div className="w-full h-2 bg-primary/20 rounded-full overflow-hidden">
<div

View file

@ -52,7 +52,7 @@ export interface TaskStatusResponse {
message?: string;
}
export type TaskPhase = 'splitting' | 'splitting_complete' | 'fetching' | 'filtering' | 'processing' | 'completed';
export type TaskPhase = 'splitting' | 'splitting_complete' | 'fetching' | 'processing' | 'completed';
export interface TaskResult {
progress: number;
@ -69,10 +69,7 @@ export interface TaskResult {
subqueries_completed?: number;
ids_collected?: number;
pages_fetched?: number;
// Filtering phase
total_found?: number;
existing_in_db?: number;
new_listings?: number;
fetching_done?: boolean;
// Processing phase
details_fetched?: number;
images_downloaded?: number;