From ceb943f198645f71f03abef23c94d94953a8bb68 Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Sun, 1 Feb 2026 22:30:04 +0000
Subject: [PATCH] Fix refresh listings returning immediate success with no
 progress

The get_ids_to_process function was using set union instead of set
difference, causing it to return all existing listing IDs along with
new ones. This meant:

1. When there were no new listings, the task would iterate through all
   existing listings, find nothing to process for each, and complete
   almost instantly

2. The task showed no progress because processing was too fast

Fixed by:
- Changed `all_listing_ids.union(identifiers)` to `identifiers - all_listing_ids`
  to only return IDs that are NOT already in the database
- Added explicit check for empty set with informative task state
  "No new listings found" so users understand why the task completed quickly
---
 crawler/tasks/listing_tasks.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py
index 3233947..f86f89e 100644
--- a/crawler/tasks/listing_tasks.py
+++ b/crawler/tasks/listing_tasks.py
@@ -59,6 +59,13 @@ async def dump_listings_full(
     )
     logger.info(f"Found {len(ids_to_process)} listings to process")
 
+    if len(ids_to_process) == 0:
+        task.update_state(
+            state="No new listings found",
+            meta={"progress": 1, "processed": 0, "total": 0, "message": "All listings are up to date"},
+        )
+        return []
+
     listing_processor = ListingProcessor(repository)
     logger.info(f"Starting processing {len(ids_to_process)} listings")
     return await dump_listings_and_monitor(
@@ -153,8 +160,8 @@ async def get_ids_to_process(
 
     # if listing is already in db, do not fetch details again
     all_listing_ids = {l.id for l in await repository.get_listings()}
-    all_ids = all_listing_ids.union(identifiers)
-    return all_ids
+    new_ids = identifiers - all_listing_ids
+    return new_ids
 
 
 async def get_valid_districts_to_scrape(