import asyncio import pathlib import click import importlib from rec.districts import get_districts from data_access import Listing import csv_exporter from rec.query import ListingType dump_listings_module = importlib.import_module('1_dump_listings') dump_detail_module = importlib.import_module('2_dump_detail') dump_images_module = importlib.import_module('3_dump_images') detect_floorplan_module = importlib.import_module('4_detect_floorplan') routing_module = importlib.import_module('5_routing') steps_to_handlers = { 'dump_listings': dump_listings_module.dump_listings, 'dump_detail': dump_detail_module.dump_detail, 'dump_images': dump_images_module.dump_images, 'detect_floorplan': detect_floorplan_module.detect_floorplan, 'routing': routing_module.calculate_route, } @click.group() @click.option( '--data-dir', default=pathlib.Path("data/rs/"), help='Districts to scrape', type=click.Path( writable=True, file_okay=False, dir_okay=True, resolve_path=True, ), ) @click.pass_context def cli(ctx, data_dir: str): ctx.ensure_object(dict) ctx.obj['data_dir'] = data_dir pass @cli.command() @click.option( '--type', '-t', help='Type of listing to scrape', type=click.Choice( ListingType.__members__.keys(), case_sensitive=False, ), required=True, ) @click.option( '--min-bedrooms', default=1, help='Minimum number of bedrooms', type=click.IntRange(min=1), ) @click.option( '--max-bedrooms', default=5, help='Maximum number of bedrooms', type=click.IntRange(min=1), ) @click.option( '--min-price', default=0, help='Minimum price', type=click.IntRange(min=0), ) @click.option( '--max-price', default=1000000, help='Maximum price', type=click.IntRange(min=0), ) @click.option( '--district', default=None, help='Districts to scrape', type=click.Choice(get_districts().keys(), case_sensitive=False), multiple=True, ) @click.pass_context def dump_listings( ctx: click.core.Context, district: list[str], min_bedrooms: int, max_bedrooms: int, min_price: int, max_price: int, type: str, ): data_dir: str = ctx.obj['data_dir'] query_parameters = dump_listings_module.QueryParameters( listing_type=ListingType[type], district_names=set(district), min_bedrooms=min_bedrooms, max_bedrooms=max_bedrooms, min_price=min_price, max_price=max_price, ) click.echo( f'Running dump_listings for districts {district}, data dir {data_dir} and parameters: ' f'{query_parameters}' ) data_dir_path = pathlib.Path(data_dir) asyncio.run(dump_listings_module.dump_listings(query_parameters, data_dir_path)) @cli.command() @click.pass_context def dump_details(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running dump_detail for listings stored in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) asyncio.run(dump_detail_module.dump_detail(listing_paths)) @cli.command() @click.pass_context def dump_images(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running dump_images stored in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) asyncio.run(dump_images_module.dump_images(listing_paths)) @cli.command() @click.pass_context def detect_floorplan(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running detect_floorplan in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) asyncio.run(detect_floorplan_module.detect_floorplan(listing_paths)) @cli.command() @click.pass_context def routing(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running routing for listings in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) routing_module.calculate_route(listing_paths) @cli.command() @click.option( '--columns', '-C', help='Columns to include in the CSV file', type=click.Choice( Listing.ALL_COLUMNS, case_sensitive=False, ), multiple=True, default=Listing.ALL_COLUMNS, ) @click.option( '--output-file', '-O', help='Path to the output CSV file', required=True, type=click.Path( writable=True, file_okay=True, dir_okay=False, resolve_path=True, ), ) @click.pass_context def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]): data_dir = ctx.obj['data_dir'] click.echo(f'Exporting data to {output_file} using {data_dir=}') output_file_path = pathlib.Path(output_file) listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) listings = Listing.get_all_listings(listing_paths) csv_exporter.export_to_csv(listings, output_file_path, list(columns)) if __name__ == '__main__': cli()