make dumping details async

This commit is contained in:
Viktor Barzin 2025-05-17 22:11:33 +00:00
parent ad879f2d4f
commit 3e7a144fb4
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 27 additions and 23 deletions

View file

@ -1,35 +1,39 @@
import asyncio
import json import json
import pathlib import pathlib
from rec.query import detail_query from rec.query import detail_query
from tqdm import tqdm from tqdm.asyncio import tqdm
from data_access import Listing from data_access import Listing
def dump_detail(listing_paths: list[str]): async def dump_detail(listing_paths: list[str]):
incremental = True
listings = Listing.get_all_listings(listing_paths) listings = Listing.get_all_listings(listing_paths)
filtered_listings = [] filtered_listings = await tqdm.gather(
for listing in listings: *[_dump_detail_for_listing(listing) for listing in listings]
# We introduced last_seen later, so not all entries have it. )
# If it doesnt exist then its on the platform anymore. So skip return filtered_listings
last_seen = listing.last_seen
if last_seen is None:
continue
if not incremental and last_seen <= 1:
filtered_listings.append(listing)
if incremental and not listing.path_detail_json().exists(): async def _dump_detail_for_listing(listing: Listing):
filtered_listings.append(listing) incremental = True
# We introduced last_seen later, so not all entries have it.
# If it doesnt exist then its on the platform anymore. So skip
last_seen = listing.last_seen
if last_seen is None:
return
for listing in tqdm(filtered_listings): if not incremental and last_seen <= 1:
try: return
d = detail_query(listing.identifier)
with open(listing.path_detail_json(), "w") as f: if incremental and not listing.path_detail_json().exists():
json.dump(d, f) return
except Exception as e: print('fetching', listing.identifier)
print(e)
# for listing in tqdm(filtered_listings):
d = await detail_query(listing.identifier)
with open(listing.path_detail_json(), "w") as f:
json.dump(d, f)
def main(): def main():

View file

@ -117,7 +117,7 @@ def dump_details(ctx: click.core.Context):
data_dir = ctx.obj['data_dir'] data_dir = ctx.obj['data_dir']
click.echo(f'Running dump_detail for listings stored in {data_dir}') click.echo(f'Running dump_detail for listings stored in {data_dir}')
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
dump_detail_module.dump_detail(listing_paths) asyncio.run(dump_detail_module.dump_detail(listing_paths))
@cli.command() @cli.command()

View file

@ -30,7 +30,7 @@ class PropertyType(enum.StrEnum):
TERRACED = "terraced" TERRACED = "terraced"
def detail_query(detail_id: int): async def detail_query(detail_id: int):
params = { params = {
"apiApplication": "ANDROID", "apiApplication": "ANDROID",
"appVersion": "3.70.0", "appVersion": "3.70.0",