reformat with black; looks better
This commit is contained in:
parent
1122f5a96f
commit
0b9d50af47
11 changed files with 240 additions and 244 deletions
148
crawler/main.py
148
crawler/main.py
|
|
@ -12,18 +12,18 @@ from rec.query import ListingType, FurnishType
|
|||
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
|
||||
from ui_exporter import export_immoweb as export_immoweb_ui
|
||||
|
||||
dump_listings_module = importlib.import_module('1_dump_listings')
|
||||
dump_detail_module = importlib.import_module('2_dump_detail')
|
||||
dump_images_module = importlib.import_module('3_dump_images')
|
||||
detect_floorplan_module = importlib.import_module('4_detect_floorplan')
|
||||
routing_module = importlib.import_module('5_routing')
|
||||
dump_listings_module = importlib.import_module("1_dump_listings")
|
||||
dump_detail_module = importlib.import_module("2_dump_detail")
|
||||
dump_images_module = importlib.import_module("3_dump_images")
|
||||
detect_floorplan_module = importlib.import_module("4_detect_floorplan")
|
||||
routing_module = importlib.import_module("5_routing")
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.option(
|
||||
'--data-dir',
|
||||
"--data-dir",
|
||||
default=pathlib.Path("data/rs/"),
|
||||
help='Districts to scrape',
|
||||
help="Districts to scrape",
|
||||
type=click.Path(
|
||||
writable=True,
|
||||
file_okay=False,
|
||||
|
|
@ -34,15 +34,15 @@ routing_module = importlib.import_module('5_routing')
|
|||
@click.pass_context
|
||||
def cli(ctx, data_dir: str):
|
||||
ctx.ensure_object(dict)
|
||||
ctx.obj['data_dir'] = data_dir
|
||||
ctx.obj["data_dir"] = data_dir
|
||||
pass
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option(
|
||||
'--type',
|
||||
'-t',
|
||||
help='Type of listing to scrape',
|
||||
"--type",
|
||||
"-t",
|
||||
help="Type of listing to scrape",
|
||||
type=click.Choice(
|
||||
ListingType.__members__.keys(),
|
||||
case_sensitive=False,
|
||||
|
|
@ -50,45 +50,42 @@ def cli(ctx, data_dir: str):
|
|||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
'--min-bedrooms',
|
||||
"--min-bedrooms",
|
||||
default=1,
|
||||
help='Minimum number of bedrooms',
|
||||
help="Minimum number of bedrooms",
|
||||
type=click.IntRange(min=1),
|
||||
)
|
||||
@click.option(
|
||||
'--max-bedrooms',
|
||||
"--max-bedrooms",
|
||||
default=5,
|
||||
help='Maximum number of bedrooms',
|
||||
help="Maximum number of bedrooms",
|
||||
type=click.IntRange(min=1),
|
||||
)
|
||||
@click.option(
|
||||
'--min-price',
|
||||
"--min-price",
|
||||
default=0,
|
||||
help='Minimum price',
|
||||
help="Minimum price",
|
||||
type=click.IntRange(min=0),
|
||||
)
|
||||
@click.option(
|
||||
'--max-price',
|
||||
"--max-price",
|
||||
default=1000000,
|
||||
help='Maximum price',
|
||||
help="Maximum price",
|
||||
type=click.IntRange(min=0),
|
||||
)
|
||||
@click.option(
|
||||
'--district',
|
||||
"--district",
|
||||
default=None,
|
||||
help='Districts to scrape',
|
||||
help="Districts to scrape",
|
||||
type=click.Choice(get_districts().keys(), case_sensitive=False),
|
||||
multiple=True,
|
||||
)
|
||||
@click.option(
|
||||
'--furnish-types',
|
||||
'-f',
|
||||
help='Furnish types for rented listings',
|
||||
"--furnish-types",
|
||||
"-f",
|
||||
help="Furnish types for rented listings",
|
||||
type=click.Choice(
|
||||
[
|
||||
furnish_type.name
|
||||
for furnish_type in FurnishType.__members__.values()
|
||||
],
|
||||
[furnish_type.name for furnish_type in FurnishType.__members__.values()],
|
||||
case_sensitive=False,
|
||||
),
|
||||
multiple=True,
|
||||
|
|
@ -104,7 +101,7 @@ def dump_listings(
|
|||
type: str,
|
||||
furnish_types: list[str],
|
||||
):
|
||||
data_dir: str = ctx.obj['data_dir']
|
||||
data_dir: str = ctx.obj["data_dir"]
|
||||
query_parameters = dump_listings_module.QueryParameters(
|
||||
listing_type=ListingType[type],
|
||||
district_names=set(district),
|
||||
|
|
@ -112,23 +109,21 @@ def dump_listings(
|
|||
max_bedrooms=max_bedrooms,
|
||||
min_price=min_price,
|
||||
max_price=max_price,
|
||||
furnish_types=[
|
||||
FurnishType[furnish_type] for furnish_type in furnish_types
|
||||
],
|
||||
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
|
||||
)
|
||||
click.echo(
|
||||
f'Running dump_listings for districts {district}, data dir {data_dir} and parameters: '
|
||||
f'{query_parameters}')
|
||||
f"Running dump_listings for districts {district}, data dir {data_dir} and parameters: "
|
||||
f"{query_parameters}"
|
||||
)
|
||||
data_dir_path = pathlib.Path(data_dir)
|
||||
asyncio.run(
|
||||
dump_listings_module.dump_listings(query_parameters, data_dir_path))
|
||||
asyncio.run(dump_listings_module.dump_listings(query_parameters, data_dir_path))
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.pass_context
|
||||
def dump_details(ctx: click.core.Context):
|
||||
data_dir = ctx.obj['data_dir']
|
||||
click.echo(f'Running dump_detail for listings stored in {data_dir}')
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
click.echo(f"Running dump_detail for listings stored in {data_dir}")
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
asyncio.run(dump_detail_module.dump_detail(listing_paths))
|
||||
|
||||
|
|
@ -136,8 +131,8 @@ def dump_details(ctx: click.core.Context):
|
|||
@cli.command()
|
||||
@click.pass_context
|
||||
def dump_images(ctx: click.core.Context):
|
||||
data_dir = ctx.obj['data_dir']
|
||||
click.echo(f'Running dump_images stored in {data_dir}')
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
click.echo(f"Running dump_images stored in {data_dir}")
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
asyncio.run(dump_images_module.dump_images(listing_paths))
|
||||
|
||||
|
|
@ -145,24 +140,24 @@ def dump_images(ctx: click.core.Context):
|
|||
@cli.command()
|
||||
@click.pass_context
|
||||
def detect_floorplan(ctx: click.core.Context):
|
||||
data_dir = ctx.obj['data_dir']
|
||||
click.echo(f'Running detect_floorplan in {data_dir}')
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
click.echo(f"Running detect_floorplan in {data_dir}")
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
asyncio.run(detect_floorplan_module.detect_floorplan(listing_paths))
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option(
|
||||
'--destination-address',
|
||||
'-d',
|
||||
help='Destination address for routing',
|
||||
"--destination-address",
|
||||
"-d",
|
||||
help="Destination address for routing",
|
||||
required=True,
|
||||
type=click.STRING,
|
||||
)
|
||||
@click.option(
|
||||
'--travel-mode',
|
||||
'-m',
|
||||
help='Travel mode for routing',
|
||||
"--travel-mode",
|
||||
"-m",
|
||||
help="Travel mode for routing",
|
||||
type=click.Choice(
|
||||
TravelMode.__members__.keys(),
|
||||
case_sensitive=False,
|
||||
|
|
@ -170,23 +165,25 @@ def detect_floorplan(ctx: click.core.Context):
|
|||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
'--limit',
|
||||
'-l',
|
||||
help='Limit the number of listings to process',
|
||||
"--limit",
|
||||
"-l",
|
||||
help="Limit the number of listings to process",
|
||||
type=click.IntRange(min=1),
|
||||
default=1, # by default limit to 1 to avoid accidental API usage
|
||||
)
|
||||
@click.pass_context
|
||||
def routing(ctx: click.core.Context, destination_address: str,
|
||||
travel_mode: str, limit: int):
|
||||
data_dir = ctx.obj['data_dir']
|
||||
click.echo(f'Running routing for the first {limit} listings in {data_dir}')
|
||||
def routing(
|
||||
ctx: click.core.Context, destination_address: str, travel_mode: str, limit: int
|
||||
):
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
click.echo(f"Running routing for the first {limit} listings in {data_dir}")
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
listing_paths = listing_paths[:limit]
|
||||
if os.environ.get(API_KEY_ENVIRONMENT_VARIABLE) is None:
|
||||
raise click.exceptions.MissingParameter(
|
||||
f'{API_KEY_ENVIRONMENT_VARIABLE} environment variable is not set. '
|
||||
'Please set it to your API key for the routing service.')
|
||||
f"{API_KEY_ENVIRONMENT_VARIABLE} environment variable is not set. "
|
||||
"Please set it to your API key for the routing service."
|
||||
)
|
||||
|
||||
asyncio.run(
|
||||
routing_module.calculate_route(
|
||||
|
|
@ -194,14 +191,15 @@ def routing(ctx: click.core.Context, destination_address: str,
|
|||
destination_address,
|
||||
# destination_address_coordinates,
|
||||
TravelMode[travel_mode],
|
||||
))
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option(
|
||||
'--columns',
|
||||
'-C',
|
||||
help='Columns to include in the CSV file',
|
||||
"--columns",
|
||||
"-C",
|
||||
help="Columns to include in the CSV file",
|
||||
type=click.Choice(
|
||||
Listing.ALL_COLUMNS,
|
||||
case_sensitive=False,
|
||||
|
|
@ -210,9 +208,9 @@ def routing(ctx: click.core.Context, destination_address: str,
|
|||
default=Listing.ALL_COLUMNS,
|
||||
)
|
||||
@click.option(
|
||||
'--output-file',
|
||||
'-O',
|
||||
help='Path to the output CSV file',
|
||||
"--output-file",
|
||||
"-O",
|
||||
help="Path to the output CSV file",
|
||||
required=True,
|
||||
type=click.Path(
|
||||
writable=True,
|
||||
|
|
@ -223,20 +221,21 @@ def routing(ctx: click.core.Context, destination_address: str,
|
|||
)
|
||||
@click.pass_context
|
||||
def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
|
||||
data_dir = ctx.obj['data_dir']
|
||||
click.echo(f'Exporting data to {output_file} using {data_dir=}')
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
click.echo(f"Exporting data to {output_file} using {data_dir=}")
|
||||
output_file_path = pathlib.Path(output_file)
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
listings = Listing.get_all_listings([str(path) for path in listing_paths])
|
||||
asyncio.run(
|
||||
csv_exporter.export_to_csv(listings, output_file_path,
|
||||
list(columns)), )
|
||||
|
||||
csv_exporter.export_to_csv(listings, output_file_path, list(columns)),
|
||||
)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option(
|
||||
'--output-file',
|
||||
'-O',
|
||||
help='Path to the output immoweb file',
|
||||
"--output-file",
|
||||
"-O",
|
||||
help="Path to the output immoweb file",
|
||||
required=True,
|
||||
type=click.Path(
|
||||
writable=True,
|
||||
|
|
@ -247,10 +246,9 @@ def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
|
|||
)
|
||||
@click.pass_context
|
||||
def export_immoweb(ctx, output_file: str):
|
||||
click.echo(f'Exporting data to {output_file}')
|
||||
click.echo(f"Exporting data to {output_file}")
|
||||
asyncio.run(export_immoweb_ui(ctx, output_file))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue