convert listings dump to asyncio

This commit is contained in:
Viktor Barzin 2025-05-17 21:55:42 +00:00
parent df24c2c1b7
commit ad879f2d4f
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 35 additions and 34 deletions

View file

@ -1,3 +1,4 @@
import asyncio
from dataclasses import dataclass from dataclasses import dataclass
import pathlib import pathlib
from rec.query import ListingType, listing_query from rec.query import ListingType, listing_query
@ -18,7 +19,7 @@ class QueryParameters:
max_days_since_added: int | None = None max_days_since_added: int | None = None
def dump_listings( async def dump_listings(
parameters: QueryParameters, parameters: QueryParameters,
data_dir: pathlib.Path = pathlib.Path("data/rs/"), data_dir: pathlib.Path = pathlib.Path("data/rs/"),
) -> list[Listing]: ) -> list[Listing]:
@ -29,11 +30,10 @@ def dump_listings(
} }
print("Valid districts to scrape:", districts.keys()) print("Valid districts to scrape:", districts.keys())
listings = [] listings = []
for district, locid in districts.items():
print("#### District:", district) json_responses = await asyncio.gather(
for i in [1, 2]: *[
try: listing_query(
response_json = listing_query(
page=i, page=i,
channel=parameters.listing_type, channel=parameters.listing_type,
min_bedrooms=parameters.min_bedrooms, min_bedrooms=parameters.min_bedrooms,
@ -44,23 +44,23 @@ def dump_listings(
location_id=locid, location_id=locid,
page_size=parameters.page_size, page_size=parameters.page_size,
max_days_since_added=parameters.max_days_since_added, max_days_since_added=parameters.max_days_since_added,
) for locid in districts.values() for i in [1, 2]
]
) )
except Exception as e: listings = []
print(e) for response_json in json_responses:
break if response_json["totalAvailableResults"] == 0:
if i == 1: print("No results found")
continue
if response_json["totalAvailableResults"] > 0:
print("totalAvailableResults: ", response_json["totalAvailableResults"]) print("totalAvailableResults: ", response_json["totalAvailableResults"])
if len(response_json["properties"]) == 0:
break
print(f"page {i}", end=", ", flush=True)
for property in response_json["properties"]: for property in response_json["properties"]:
identifier = property["identifier"] identifier = property["identifier"]
listing = Listing(identifier, data_dir=data_dir) listing = Listing(identifier, data_dir=data_dir)
listing.dump_listing(property) listing.dump_listing(property)
listings.append(listing) listings.append(listing)
print() # break line as we used end=, above.
return listings return listings

View file

@ -1,3 +1,4 @@
import asyncio
import pathlib import pathlib
import click import click
import importlib import importlib
@ -107,12 +108,12 @@ def dump_listings(
f'{query_parameters}' f'{query_parameters}'
) )
data_dir_path = pathlib.Path(data_dir) data_dir_path = pathlib.Path(data_dir)
dump_listings_module.dump_listings(query_parameters, data_dir_path) asyncio.run(dump_listings_module.dump_listings(query_parameters, data_dir_path))
@cli.command() @cli.command()
@click.pass_context @click.pass_context
def dump_detail(ctx: click.core.Context): def dump_details(ctx: click.core.Context):
data_dir = ctx.obj['data_dir'] data_dir = ctx.obj['data_dir']
click.echo(f'Running dump_detail for listings stored in {data_dir}') click.echo(f'Running dump_detail for listings stored in {data_dir}')
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))

View file

@ -46,7 +46,7 @@ def detail_query(detail_id: int):
return response.json() return response.json()
def listing_query( async def listing_query(
page: int, page: int,
channel: ListingType, channel: ListingType,
min_bedrooms: int, min_bedrooms: int,