use aiohttp to fetch details concurrently
This commit is contained in:
parent
61b8c82592
commit
01ac24b4b7
5 changed files with 602 additions and 21 deletions
|
|
@ -6,6 +6,9 @@ from tqdm.asyncio import tqdm
|
|||
|
||||
from data_access import Listing
|
||||
|
||||
# Setting this too high either crashes rightmove or gets us blocked
|
||||
semaphore = asyncio.Semaphore(10)
|
||||
|
||||
|
||||
async def dump_detail(listing_paths: list[str]):
|
||||
listings = Listing.get_all_listings(listing_paths)
|
||||
|
|
@ -17,21 +20,12 @@ async def dump_detail(listing_paths: list[str]):
|
|||
|
||||
async def _dump_detail_for_listing(listing: Listing):
|
||||
incremental = True
|
||||
# We introduced last_seen later, so not all entries have it.
|
||||
# If it doesnt exist then its on the platform anymore. So skip
|
||||
last_seen = listing.last_seen
|
||||
if last_seen is None:
|
||||
return
|
||||
|
||||
if not incremental and last_seen <= 1:
|
||||
return
|
||||
|
||||
if incremental and not listing.path_detail_json().exists():
|
||||
return
|
||||
print('fetching', listing.identifier)
|
||||
|
||||
# for listing in tqdm(filtered_listings):
|
||||
d = await detail_query(listing.identifier)
|
||||
async with semaphore:
|
||||
d = await detail_query(listing.identifier)
|
||||
with open(listing.path_detail_json(), "w") as f:
|
||||
json.dump(d, f)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue