split listing query into multiple subqueries to avoid hittign the 1.5k rightmove limit
This commit is contained in:
parent
241b8d1842
commit
a055c92dea
1 changed files with 45 additions and 25 deletions
|
|
@ -108,32 +108,52 @@ async def _fetch_listings_with_semaphore(
|
|||
district: str,
|
||||
) -> list[dict[str, Any]]:
|
||||
result = []
|
||||
# we don't know how many pages we have but we stop as soon as there's no more
|
||||
for page_id in range(1, 3):
|
||||
logger.debug(f"Processing {page_id=} for {district=}")
|
||||
# seems like all searches stop at 1500 entries (page_id * page_size)
|
||||
async with semaphore:
|
||||
try:
|
||||
listing_query_result = await listing_query(
|
||||
page=page_id,
|
||||
channel=parameters.listing_type,
|
||||
min_bedrooms=parameters.min_bedrooms,
|
||||
max_bedrooms=parameters.max_bedrooms,
|
||||
radius=parameters.radius,
|
||||
min_price=parameters.min_price,
|
||||
max_price=parameters.max_price,
|
||||
district=district,
|
||||
page_size=parameters.page_size,
|
||||
max_days_since_added=parameters.max_days_since_added,
|
||||
furnish_types=parameters.furnish_types or [],
|
||||
)
|
||||
# split the price in N bands to avoid the 1.5k capping by rightmove
|
||||
# basically instead of 1 query with price between 1k and 5k that is capped at 1500 results
|
||||
# we do 10 queries each with an increment in price range so we send more queries but each
|
||||
# has a smaller chance of returning more than 1.5k results
|
||||
|
||||
except Exception as e:
|
||||
if "GENERIC_ERROR" in str(e): # Too big page id
|
||||
logger.debug(f"Max page id for {district=}: {page_id-1}")
|
||||
break
|
||||
raise e
|
||||
result.append(listing_query_result)
|
||||
number_of_steps = 10
|
||||
price_step = parameters.max_price // number_of_steps
|
||||
|
||||
for step in range(number_of_steps):
|
||||
min_price = step * price_step
|
||||
max_price = (step + 1) * price_step
|
||||
logger.debug(
|
||||
f"Step {step} of {number_of_steps} with {min_price=} and {max_price=}"
|
||||
)
|
||||
|
||||
for num_bedrooms in range(parameters.min_bedrooms, parameters.max_bedrooms + 1):
|
||||
for page_id in range(
|
||||
1,
|
||||
3, # seems like all searches stop at 1500 entries (page_id * page_size)
|
||||
):
|
||||
logger.debug(f"Processing {page_id=} for {district=}")
|
||||
|
||||
async with semaphore:
|
||||
try:
|
||||
listing_query_result = await listing_query(
|
||||
page=page_id,
|
||||
channel=parameters.listing_type,
|
||||
# min_bedrooms=parameters.min_bedrooms,
|
||||
# max_bedrooms=parameters.max_bedrooms,
|
||||
min_bedrooms=num_bedrooms,
|
||||
max_bedrooms=num_bedrooms,
|
||||
radius=parameters.radius,
|
||||
min_price=min_price,
|
||||
max_price=max_price,
|
||||
district=district,
|
||||
page_size=parameters.page_size,
|
||||
max_days_since_added=parameters.max_days_since_added,
|
||||
furnish_types=parameters.furnish_types or [],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
if "GENERIC_ERROR" in str(e): # Too big page id
|
||||
logger.debug(f"Max page id for {district=}: {page_id-1}")
|
||||
break
|
||||
raise e
|
||||
result.append(listing_query_result)
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue