From ceb943f198645f71f03abef23c94d94953a8bb68 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Feb 2026 22:30:04 +0000 Subject: [PATCH] Fix refresh listings returning immediate success with no progress The get_ids_to_process function was using set union instead of set difference, causing it to return all existing listing IDs along with new ones. This meant: 1. When there were no new listings, the task would iterate through all existing listings, find nothing to process for each, and complete almost instantly 2. The task showed no progress because processing was too fast Fixed by: - Changed `all_listing_ids.union(identifiers)` to `identifiers - all_listing_ids` to only return IDs that are NOT already in the database - Added explicit check for empty set with informative task state "No new listings found" so users understand why the task completed quickly --- crawler/tasks/listing_tasks.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py index 3233947..f86f89e 100644 --- a/crawler/tasks/listing_tasks.py +++ b/crawler/tasks/listing_tasks.py @@ -59,6 +59,13 @@ async def dump_listings_full( ) logger.info(f"Found {len(ids_to_process)} listings to process") + if len(ids_to_process) == 0: + task.update_state( + state="No new listings found", + meta={"progress": 1, "processed": 0, "total": 0, "message": "All listings are up to date"}, + ) + return [] + listing_processor = ListingProcessor(repository) logger.info(f"Starting processing {len(ids_to_process)} listings") return await dump_listings_and_monitor( @@ -153,8 +160,8 @@ async def get_ids_to_process( # if listing is already in db, do not fetch details again all_listing_ids = {l.id for l in await repository.get_listings()} - all_ids = all_listing_ids.union(identifiers) - return all_ids + new_ids = identifiers - all_listing_ids + return new_ids async def get_valid_districts_to_scrape(