convert listings dump to asyncio
This commit is contained in:
parent
df24c2c1b7
commit
ad879f2d4f
3 changed files with 35 additions and 34 deletions
|
|
@ -1,3 +1,4 @@
|
||||||
|
import asyncio
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import pathlib
|
import pathlib
|
||||||
from rec.query import ListingType, listing_query
|
from rec.query import ListingType, listing_query
|
||||||
|
|
@ -18,7 +19,7 @@ class QueryParameters:
|
||||||
max_days_since_added: int | None = None
|
max_days_since_added: int | None = None
|
||||||
|
|
||||||
|
|
||||||
def dump_listings(
|
async def dump_listings(
|
||||||
parameters: QueryParameters,
|
parameters: QueryParameters,
|
||||||
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
||||||
) -> list[Listing]:
|
) -> list[Listing]:
|
||||||
|
|
@ -29,38 +30,37 @@ def dump_listings(
|
||||||
}
|
}
|
||||||
print("Valid districts to scrape:", districts.keys())
|
print("Valid districts to scrape:", districts.keys())
|
||||||
listings = []
|
listings = []
|
||||||
for district, locid in districts.items():
|
|
||||||
print("#### District:", district)
|
|
||||||
for i in [1, 2]:
|
|
||||||
try:
|
|
||||||
response_json = listing_query(
|
|
||||||
page=i,
|
|
||||||
channel=parameters.listing_type,
|
|
||||||
min_bedrooms=parameters.min_bedrooms,
|
|
||||||
max_bedrooms=parameters.max_bedrooms,
|
|
||||||
radius=parameters.radius,
|
|
||||||
min_price=parameters.min_price,
|
|
||||||
max_price=parameters.max_price,
|
|
||||||
location_id=locid,
|
|
||||||
page_size=parameters.page_size,
|
|
||||||
max_days_since_added=parameters.max_days_since_added,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
break
|
|
||||||
if i == 1:
|
|
||||||
print("totalAvailableResults: ", response_json["totalAvailableResults"])
|
|
||||||
if len(response_json["properties"]) == 0:
|
|
||||||
break
|
|
||||||
print(f"page {i}", end=", ", flush=True)
|
|
||||||
|
|
||||||
for property in response_json["properties"]:
|
json_responses = await asyncio.gather(
|
||||||
identifier = property["identifier"]
|
*[
|
||||||
|
listing_query(
|
||||||
|
page=i,
|
||||||
|
channel=parameters.listing_type,
|
||||||
|
min_bedrooms=parameters.min_bedrooms,
|
||||||
|
max_bedrooms=parameters.max_bedrooms,
|
||||||
|
radius=parameters.radius,
|
||||||
|
min_price=parameters.min_price,
|
||||||
|
max_price=parameters.max_price,
|
||||||
|
location_id=locid,
|
||||||
|
page_size=parameters.page_size,
|
||||||
|
max_days_since_added=parameters.max_days_since_added,
|
||||||
|
) for locid in districts.values() for i in [1, 2]
|
||||||
|
]
|
||||||
|
)
|
||||||
|
listings = []
|
||||||
|
for response_json in json_responses:
|
||||||
|
if response_json["totalAvailableResults"] == 0:
|
||||||
|
print("No results found")
|
||||||
|
continue
|
||||||
|
if response_json["totalAvailableResults"] > 0:
|
||||||
|
print("totalAvailableResults: ", response_json["totalAvailableResults"])
|
||||||
|
for property in response_json["properties"]:
|
||||||
|
identifier = property["identifier"]
|
||||||
|
|
||||||
|
listing = Listing(identifier, data_dir=data_dir)
|
||||||
|
listing.dump_listing(property)
|
||||||
|
listings.append(listing)
|
||||||
|
|
||||||
listing = Listing(identifier, data_dir=data_dir)
|
|
||||||
listing.dump_listing(property)
|
|
||||||
listings.append(listing)
|
|
||||||
print() # break line as we used end=, above.
|
|
||||||
return listings
|
return listings
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import asyncio
|
||||||
import pathlib
|
import pathlib
|
||||||
import click
|
import click
|
||||||
import importlib
|
import importlib
|
||||||
|
|
@ -107,12 +108,12 @@ def dump_listings(
|
||||||
f'{query_parameters}'
|
f'{query_parameters}'
|
||||||
)
|
)
|
||||||
data_dir_path = pathlib.Path(data_dir)
|
data_dir_path = pathlib.Path(data_dir)
|
||||||
dump_listings_module.dump_listings(query_parameters, data_dir_path)
|
asyncio.run(dump_listings_module.dump_listings(query_parameters, data_dir_path))
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def dump_detail(ctx: click.core.Context):
|
def dump_details(ctx: click.core.Context):
|
||||||
data_dir = ctx.obj['data_dir']
|
data_dir = ctx.obj['data_dir']
|
||||||
click.echo(f'Running dump_detail for listings stored in {data_dir}')
|
click.echo(f'Running dump_detail for listings stored in {data_dir}')
|
||||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ def detail_query(detail_id: int):
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
def listing_query(
|
async def listing_query(
|
||||||
page: int,
|
page: int,
|
||||||
channel: ListingType,
|
channel: ListingType,
|
||||||
min_bedrooms: int,
|
min_bedrooms: int,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue