expose rightmove query parameters as cli options

This commit is contained in:
Viktor Barzin 2025-05-17 20:40:44 +00:00
parent 96562c0895
commit bb9afc76fe
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
2 changed files with 70 additions and 15 deletions

View file

@ -1,33 +1,47 @@
from dataclasses import dataclass
import pathlib import pathlib
from rec.query import listing_query from rec.query import listing_query
from rec.districts import get_districts from rec.districts import get_districts
from data_access import Listing from data_access import Listing
@dataclass(frozen=True)
class QueryParameters:
min_bedrooms: int
max_bedrooms: int
min_price: int
max_price: int
district_names: set[str]
radius: float = 0
page_size: int = 500 # items per page
max_days_since_added: int | None = None
def dump_listings( def dump_listings(
district_names: set[str] | None = None, parameters: QueryParameters,
data_dir: pathlib.Path = pathlib.Path("data/rs/") data_dir: pathlib.Path = pathlib.Path("data/rs/"),
): ) -> list[Listing]:
districts = get_districts() if district_names is None else { districts = {
district: locid district: locid
for district, locid in get_districts().items() for district, locid in get_districts().items()
if district in district_names if district in parameters.district_names
} }
print("Valid districts to scrape:", districts.keys()) print("Valid districts to scrape:", districts.keys())
listings = []
for district, locid in districts.items(): for district, locid in districts.items():
print("#### District:", district) print("#### District:", district)
for i in [1, 2]: for i in [1, 2]:
try: try:
d = listing_query( d = listing_query(
page=i, page=i,
min_bedrooms=1, min_bedrooms=parameters.min_bedrooms,
max_bedrooms=4, max_bedrooms=parameters.max_bedrooms,
radius=0, radius=parameters.radius,
min_price=0, min_price=parameters.min_price,
max_price=1000000, max_price=parameters.max_price,
location_id=locid, location_id=locid,
page_size=500, page_size=parameters.page_size,
max_days_since_added=None, max_days_since_added=parameters.max_days_since_added,
) )
except Exception as e: except Exception as e:
print(e) print(e)
@ -43,7 +57,9 @@ def dump_listings(
listing = Listing(identifier, data_dir=data_dir) listing = Listing(identifier, data_dir=data_dir)
listing.dump_listing(property) listing.dump_listing(property)
listings.append(listing)
print() # break line as we used end=, above. print() # break line as we used end=, above.
return listings
def main(): def main():

View file

@ -41,6 +41,30 @@ def cli(ctx, data_dir: str):
@cli.command() @cli.command()
@click.option(
'--min-bedrooms',
default=1,
help='Minimum number of bedrooms',
type=click.IntRange(min=1),
)
@click.option(
'--max-bedrooms',
default=5,
help='Maximum number of bedrooms',
type=click.IntRange(min=1),
)
@click.option(
'--min-price',
default=0,
help='Minimum price',
type=click.IntRange(min=0),
)
@click.option(
'--max-price',
default=1000000,
help='Maximum price',
type=click.IntRange(min=0),
)
@click.option( @click.option(
'--district', '--district',
default=None, default=None,
@ -49,13 +73,28 @@ def cli(ctx, data_dir: str):
multiple=True, multiple=True,
) )
@click.pass_context @click.pass_context
def dump_listings(ctx: click.core.Context, district: list[str]): def dump_listings(
ctx: click.core.Context,
district: list[str],
min_bedrooms: int,
max_bedrooms: int,
min_price: int,
max_price: int,
):
data_dir: str = ctx.obj['data_dir'] data_dir: str = ctx.obj['data_dir']
query_parameters = dump_listings_module.QueryParameters(
district_names=set(district),
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
min_price=min_price,
max_price=max_price,
)
click.echo( click.echo(
f'Running dump_listings for districts {district} and data dir {data_dir}' f'Running dump_listings for districts {district}, data dir {data_dir} and parameters: '
f'{query_parameters}'
) )
data_dir_path = pathlib.Path(data_dir) data_dir_path = pathlib.Path(data_dir)
dump_listings_module.dump_listings(set(district), data_dir_path) dump_listings_module.dump_listings(query_parameters, data_dir_path)
@cli.command() @cli.command()