expose rightmove query parameters as cli options
This commit is contained in:
parent
96562c0895
commit
bb9afc76fe
2 changed files with 70 additions and 15 deletions
|
|
@ -1,33 +1,47 @@
|
|||
from dataclasses import dataclass
|
||||
import pathlib
|
||||
from rec.query import listing_query
|
||||
from rec.districts import get_districts
|
||||
from data_access import Listing
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueryParameters:
|
||||
min_bedrooms: int
|
||||
max_bedrooms: int
|
||||
min_price: int
|
||||
max_price: int
|
||||
district_names: set[str]
|
||||
radius: float = 0
|
||||
page_size: int = 500 # items per page
|
||||
max_days_since_added: int | None = None
|
||||
|
||||
|
||||
def dump_listings(
|
||||
district_names: set[str] | None = None,
|
||||
data_dir: pathlib.Path = pathlib.Path("data/rs/")
|
||||
):
|
||||
districts = get_districts() if district_names is None else {
|
||||
parameters: QueryParameters,
|
||||
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
||||
) -> list[Listing]:
|
||||
districts = {
|
||||
district: locid
|
||||
for district, locid in get_districts().items()
|
||||
if district in district_names
|
||||
if district in parameters.district_names
|
||||
}
|
||||
print("Valid districts to scrape:", districts.keys())
|
||||
listings = []
|
||||
for district, locid in districts.items():
|
||||
print("#### District:", district)
|
||||
for i in [1, 2]:
|
||||
try:
|
||||
d = listing_query(
|
||||
page=i,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=4,
|
||||
radius=0,
|
||||
min_price=0,
|
||||
max_price=1000000,
|
||||
min_bedrooms=parameters.min_bedrooms,
|
||||
max_bedrooms=parameters.max_bedrooms,
|
||||
radius=parameters.radius,
|
||||
min_price=parameters.min_price,
|
||||
max_price=parameters.max_price,
|
||||
location_id=locid,
|
||||
page_size=500,
|
||||
max_days_since_added=None,
|
||||
page_size=parameters.page_size,
|
||||
max_days_since_added=parameters.max_days_since_added,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
|
@ -43,7 +57,9 @@ def dump_listings(
|
|||
|
||||
listing = Listing(identifier, data_dir=data_dir)
|
||||
listing.dump_listing(property)
|
||||
listings.append(listing)
|
||||
print() # break line as we used end=, above.
|
||||
return listings
|
||||
|
||||
|
||||
def main():
|
||||
|
|
|
|||
|
|
@ -41,6 +41,30 @@ def cli(ctx, data_dir: str):
|
|||
|
||||
|
||||
@cli.command()
|
||||
@click.option(
|
||||
'--min-bedrooms',
|
||||
default=1,
|
||||
help='Minimum number of bedrooms',
|
||||
type=click.IntRange(min=1),
|
||||
)
|
||||
@click.option(
|
||||
'--max-bedrooms',
|
||||
default=5,
|
||||
help='Maximum number of bedrooms',
|
||||
type=click.IntRange(min=1),
|
||||
)
|
||||
@click.option(
|
||||
'--min-price',
|
||||
default=0,
|
||||
help='Minimum price',
|
||||
type=click.IntRange(min=0),
|
||||
)
|
||||
@click.option(
|
||||
'--max-price',
|
||||
default=1000000,
|
||||
help='Maximum price',
|
||||
type=click.IntRange(min=0),
|
||||
)
|
||||
@click.option(
|
||||
'--district',
|
||||
default=None,
|
||||
|
|
@ -49,13 +73,28 @@ def cli(ctx, data_dir: str):
|
|||
multiple=True,
|
||||
)
|
||||
@click.pass_context
|
||||
def dump_listings(ctx: click.core.Context, district: list[str]):
|
||||
def dump_listings(
|
||||
ctx: click.core.Context,
|
||||
district: list[str],
|
||||
min_bedrooms: int,
|
||||
max_bedrooms: int,
|
||||
min_price: int,
|
||||
max_price: int,
|
||||
):
|
||||
data_dir: str = ctx.obj['data_dir']
|
||||
query_parameters = dump_listings_module.QueryParameters(
|
||||
district_names=set(district),
|
||||
min_bedrooms=min_bedrooms,
|
||||
max_bedrooms=max_bedrooms,
|
||||
min_price=min_price,
|
||||
max_price=max_price,
|
||||
)
|
||||
click.echo(
|
||||
f'Running dump_listings for districts {district} and data dir {data_dir}'
|
||||
f'Running dump_listings for districts {district}, data dir {data_dir} and parameters: '
|
||||
f'{query_parameters}'
|
||||
)
|
||||
data_dir_path = pathlib.Path(data_dir)
|
||||
dump_listings_module.dump_listings(set(district), data_dir_path)
|
||||
dump_listings_module.dump_listings(query_parameters, data_dir_path)
|
||||
|
||||
|
||||
@cli.command()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue