2025-05-17 22:11:33 +00:00
|
|
|
import asyncio
|
2024-03-10 18:49:39 +00:00
|
|
|
import json
|
|
|
|
|
from rec.query import detail_query
|
2025-05-17 22:11:33 +00:00
|
|
|
from tqdm.asyncio import tqdm
|
2024-03-10 18:49:39 +00:00
|
|
|
|
2024-03-11 14:43:53 +00:00
|
|
|
from data_access import Listing
|
2024-03-10 18:49:39 +00:00
|
|
|
|
2025-05-17 22:34:27 +00:00
|
|
|
# Setting this too high either crashes rightmove or gets us blocked
|
|
|
|
|
semaphore = asyncio.Semaphore(10)
|
|
|
|
|
|
2025-05-11 19:02:23 +00:00
|
|
|
|
2025-05-17 22:11:33 +00:00
|
|
|
async def dump_detail(listing_paths: list[str]):
|
2025-05-14 20:32:37 +00:00
|
|
|
listings = Listing.get_all_listings(listing_paths)
|
2025-05-17 22:11:33 +00:00
|
|
|
filtered_listings = await tqdm.gather(
|
2025-05-31 23:50:43 +00:00
|
|
|
*[_dump_detail_for_listing(listing) for listing in listings]
|
|
|
|
|
)
|
2025-05-17 22:11:33 +00:00
|
|
|
return filtered_listings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _dump_detail_for_listing(listing: Listing):
|
2025-05-17 22:42:37 +00:00
|
|
|
if listing.path_detail_json().exists():
|
2025-05-17 22:11:33 +00:00
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# for listing in tqdm(filtered_listings):
|
2025-05-17 22:34:27 +00:00
|
|
|
async with semaphore:
|
|
|
|
|
d = await detail_query(listing.identifier)
|
2025-05-17 22:11:33 +00:00
|
|
|
with open(listing.path_detail_json(), "w") as f:
|
|
|
|
|
json.dump(d, f)
|