import pathlib import click import importlib from rec.districts import get_districts from data_access import Listing import csv_exporter dump_listings_module = importlib.import_module('1_dump_listings') dump_detail_module = importlib.import_module('2_dump_detail') dump_images_module = importlib.import_module('3_dump_images') detect_floorplan_module = importlib.import_module('4_detect_floorplan') routing_module = importlib.import_module('5_routing') steps_to_handlers = { 'dump_listings': dump_listings_module.dump_listings, 'dump_detail': dump_detail_module.dump_detail, 'dump_images': dump_images_module.dump_images, 'detect_floorplan': detect_floorplan_module.detect_floorplan, 'routing': routing_module.calculate_route, } @click.group() @click.option( '--data-dir', default=pathlib.Path("data/rs/"), help='Districts to scrape', type=click.Path( writable=True, file_okay=False, dir_okay=True, resolve_path=True, ), ) @click.pass_context def cli(ctx, data_dir: str): ctx.ensure_object(dict) ctx.obj['data_dir'] = data_dir pass @cli.command() @click.option( '--district', default=None, help='Districts to scrape', type=click.Choice(get_districts().keys(), case_sensitive=False), multiple=True, ) @click.pass_context def dump_listings(ctx: click.core.Context, district: list[str]): data_dir: str = ctx.obj['data_dir'] click.echo( f'Running dump_listings for districts {district} and data dir {data_dir}' ) data_dir_path = pathlib.Path(data_dir) dump_listings_module.dump_listings(set(district), data_dir_path) @cli.command() @click.pass_context def dump_detail(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running dump_detail for listings stored in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) dump_detail_module.dump_detail(listing_paths) @cli.command() @click.pass_context def dump_images(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running dump_images stored in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) dump_images_module.dump_images(listing_paths) @cli.command() @click.pass_context def detect_floorplan(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running detect_floorplan in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) detect_floorplan_module.detect_floorplan(listing_paths) @cli.command() @click.pass_context def routing(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running routing for listings in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) routing_module.calculate_route(listing_paths) @cli.command() @click.option( '--columns', '-C', help='Columns to include in the CSV file', type=click.Choice( Listing.ALL_COLUMNS, case_sensitive=False, ), multiple=True, default=Listing.ALL_COLUMNS, ) @click.option( '--output-file', '-O', help='Path to the output CSV file', required=True, type=click.Path( writable=True, file_okay=True, dir_okay=False, resolve_path=True, ), ) @click.pass_context def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]): data_dir = ctx.obj['data_dir'] click.echo(f'Exporting data to {output_file} using {data_dir=}') output_file_path = pathlib.Path(output_file) listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) listings = Listing.get_all_listings(listing_paths) csv_exporter.export_to_csv(listings, output_file_path, list(columns)) if __name__ == '__main__': cli()