limit the number of concurrenct requests when dumping listings as right move blocks us
This commit is contained in:
parent
24bf44caf9
commit
9735db72a0
2 changed files with 17 additions and 9 deletions
|
|
@ -39,6 +39,7 @@ async def dump_listings(
|
||||||
print("Valid districts to scrape:", districts.keys())
|
print("Valid districts to scrape:", districts.keys())
|
||||||
listings = []
|
listings = []
|
||||||
|
|
||||||
|
semaphore = asyncio.Semaphore(5) # if too high, rightmove drops connections
|
||||||
json_responses = await asyncio.gather(
|
json_responses = await asyncio.gather(
|
||||||
*[
|
*[
|
||||||
listing_query(
|
listing_query(
|
||||||
|
|
@ -53,6 +54,7 @@ async def dump_listings(
|
||||||
page_size=parameters.page_size,
|
page_size=parameters.page_size,
|
||||||
max_days_since_added=parameters.max_days_since_added,
|
max_days_since_added=parameters.max_days_since_added,
|
||||||
furnish_types=parameters.furnish_types or [],
|
furnish_types=parameters.furnish_types or [],
|
||||||
|
semaphore=semaphore,
|
||||||
)
|
)
|
||||||
for locid in districts.values()
|
for locid in districts.values()
|
||||||
for i in [1, 2]
|
for i in [1, 2]
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
# from diskcache import Cache
|
# from diskcache import Cache
|
||||||
|
import asyncio
|
||||||
import enum
|
import enum
|
||||||
from typing import Any
|
from typing import Any
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
@ -64,6 +65,7 @@ async def listing_query(
|
||||||
property_type: list[PropertyType] = [],
|
property_type: list[PropertyType] = [],
|
||||||
page_size: int = 25,
|
page_size: int = 25,
|
||||||
furnish_types: list[FurnishType] = [],
|
furnish_types: list[FurnishType] = [],
|
||||||
|
semaphore: asyncio.Semaphore | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
params: dict[str, str] = {
|
params: dict[str, str] = {
|
||||||
"locationIdentifier": location_id,
|
"locationIdentifier": location_id,
|
||||||
|
|
@ -106,12 +108,16 @@ async def listing_query(
|
||||||
"Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
}
|
}
|
||||||
|
|
||||||
async with aiohttp.ClientSession(trust_env=True) as session:
|
if semaphore is None:
|
||||||
async with session.get(
|
semaphore = asyncio.Semaphore(1)
|
||||||
"https://api.rightmove.co.uk/api/property-listing",
|
|
||||||
params=params,
|
async with semaphore:
|
||||||
headers=headers,
|
async with aiohttp.ClientSession(trust_env=True) as session:
|
||||||
) as response:
|
async with session.get(
|
||||||
if response.status != 200:
|
"https://api.rightmove.co.uk/api/property-listing",
|
||||||
raise Exception(f"Failed due to: {await response.text()}")
|
params=params,
|
||||||
return await response.json()
|
headers=headers,
|
||||||
|
) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
raise Exception(f"Failed due to: {await response.text()}")
|
||||||
|
return await response.json()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue