expose rightmove query parameters as cli options
This commit is contained in:
parent
96562c0895
commit
bb9afc76fe
2 changed files with 70 additions and 15 deletions
|
|
@ -1,33 +1,47 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
import pathlib
|
import pathlib
|
||||||
from rec.query import listing_query
|
from rec.query import listing_query
|
||||||
from rec.districts import get_districts
|
from rec.districts import get_districts
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class QueryParameters:
|
||||||
|
min_bedrooms: int
|
||||||
|
max_bedrooms: int
|
||||||
|
min_price: int
|
||||||
|
max_price: int
|
||||||
|
district_names: set[str]
|
||||||
|
radius: float = 0
|
||||||
|
page_size: int = 500 # items per page
|
||||||
|
max_days_since_added: int | None = None
|
||||||
|
|
||||||
|
|
||||||
def dump_listings(
|
def dump_listings(
|
||||||
district_names: set[str] | None = None,
|
parameters: QueryParameters,
|
||||||
data_dir: pathlib.Path = pathlib.Path("data/rs/")
|
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
||||||
):
|
) -> list[Listing]:
|
||||||
districts = get_districts() if district_names is None else {
|
districts = {
|
||||||
district: locid
|
district: locid
|
||||||
for district, locid in get_districts().items()
|
for district, locid in get_districts().items()
|
||||||
if district in district_names
|
if district in parameters.district_names
|
||||||
}
|
}
|
||||||
print("Valid districts to scrape:", districts.keys())
|
print("Valid districts to scrape:", districts.keys())
|
||||||
|
listings = []
|
||||||
for district, locid in districts.items():
|
for district, locid in districts.items():
|
||||||
print("#### District:", district)
|
print("#### District:", district)
|
||||||
for i in [1, 2]:
|
for i in [1, 2]:
|
||||||
try:
|
try:
|
||||||
d = listing_query(
|
d = listing_query(
|
||||||
page=i,
|
page=i,
|
||||||
min_bedrooms=1,
|
min_bedrooms=parameters.min_bedrooms,
|
||||||
max_bedrooms=4,
|
max_bedrooms=parameters.max_bedrooms,
|
||||||
radius=0,
|
radius=parameters.radius,
|
||||||
min_price=0,
|
min_price=parameters.min_price,
|
||||||
max_price=1000000,
|
max_price=parameters.max_price,
|
||||||
location_id=locid,
|
location_id=locid,
|
||||||
page_size=500,
|
page_size=parameters.page_size,
|
||||||
max_days_since_added=None,
|
max_days_since_added=parameters.max_days_since_added,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
@ -43,7 +57,9 @@ def dump_listings(
|
||||||
|
|
||||||
listing = Listing(identifier, data_dir=data_dir)
|
listing = Listing(identifier, data_dir=data_dir)
|
||||||
listing.dump_listing(property)
|
listing.dump_listing(property)
|
||||||
|
listings.append(listing)
|
||||||
print() # break line as we used end=, above.
|
print() # break line as we used end=, above.
|
||||||
|
return listings
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,30 @@ def cli(ctx, data_dir: str):
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
@click.option(
|
||||||
|
'--min-bedrooms',
|
||||||
|
default=1,
|
||||||
|
help='Minimum number of bedrooms',
|
||||||
|
type=click.IntRange(min=1),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
'--max-bedrooms',
|
||||||
|
default=5,
|
||||||
|
help='Maximum number of bedrooms',
|
||||||
|
type=click.IntRange(min=1),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
'--min-price',
|
||||||
|
default=0,
|
||||||
|
help='Minimum price',
|
||||||
|
type=click.IntRange(min=0),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
'--max-price',
|
||||||
|
default=1000000,
|
||||||
|
help='Maximum price',
|
||||||
|
type=click.IntRange(min=0),
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
'--district',
|
'--district',
|
||||||
default=None,
|
default=None,
|
||||||
|
|
@ -49,13 +73,28 @@ def cli(ctx, data_dir: str):
|
||||||
multiple=True,
|
multiple=True,
|
||||||
)
|
)
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def dump_listings(ctx: click.core.Context, district: list[str]):
|
def dump_listings(
|
||||||
|
ctx: click.core.Context,
|
||||||
|
district: list[str],
|
||||||
|
min_bedrooms: int,
|
||||||
|
max_bedrooms: int,
|
||||||
|
min_price: int,
|
||||||
|
max_price: int,
|
||||||
|
):
|
||||||
data_dir: str = ctx.obj['data_dir']
|
data_dir: str = ctx.obj['data_dir']
|
||||||
|
query_parameters = dump_listings_module.QueryParameters(
|
||||||
|
district_names=set(district),
|
||||||
|
min_bedrooms=min_bedrooms,
|
||||||
|
max_bedrooms=max_bedrooms,
|
||||||
|
min_price=min_price,
|
||||||
|
max_price=max_price,
|
||||||
|
)
|
||||||
click.echo(
|
click.echo(
|
||||||
f'Running dump_listings for districts {district} and data dir {data_dir}'
|
f'Running dump_listings for districts {district}, data dir {data_dir} and parameters: '
|
||||||
|
f'{query_parameters}'
|
||||||
)
|
)
|
||||||
data_dir_path = pathlib.Path(data_dir)
|
data_dir_path = pathlib.Path(data_dir)
|
||||||
dump_listings_module.dump_listings(set(district), data_dir_path)
|
dump_listings_module.dump_listings(query_parameters, data_dir_path)
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue