diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py index 3233947..f86f89e 100644 --- a/crawler/tasks/listing_tasks.py +++ b/crawler/tasks/listing_tasks.py @@ -59,6 +59,13 @@ async def dump_listings_full( ) logger.info(f"Found {len(ids_to_process)} listings to process") + if len(ids_to_process) == 0: + task.update_state( + state="No new listings found", + meta={"progress": 1, "processed": 0, "total": 0, "message": "All listings are up to date"}, + ) + return [] + listing_processor = ListingProcessor(repository) logger.info(f"Starting processing {len(ids_to_process)} listings") return await dump_listings_and_monitor( @@ -153,8 +160,8 @@ async def get_ids_to_process( # if listing is already in db, do not fetch details again all_listing_ids = {l.id for l in await repository.get_listings()} - all_ids = all_listing_ids.union(identifiers) - return all_ids + new_ids = identifiers - all_listing_ids + return new_ids async def get_valid_districts_to_scrape(