import asyncio import json import os import pathlib import click import importlib from rec.districts import get_districts from data_access import Listing import csv_exporter from rec.query import ListingType, FurnishType from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode dump_listings_module = importlib.import_module('1_dump_listings') dump_detail_module = importlib.import_module('2_dump_detail') dump_images_module = importlib.import_module('3_dump_images') detect_floorplan_module = importlib.import_module('4_detect_floorplan') routing_module = importlib.import_module('5_routing') @click.group() @click.option( '--data-dir', default=pathlib.Path("data/rs/"), help='Districts to scrape', type=click.Path( writable=True, file_okay=False, dir_okay=True, resolve_path=True, ), ) @click.pass_context def cli(ctx, data_dir: str): ctx.ensure_object(dict) ctx.obj['data_dir'] = data_dir pass @cli.command() @click.option( '--type', '-t', help='Type of listing to scrape', type=click.Choice( ListingType.__members__.keys(), case_sensitive=False, ), required=True, ) @click.option( '--min-bedrooms', default=1, help='Minimum number of bedrooms', type=click.IntRange(min=1), ) @click.option( '--max-bedrooms', default=5, help='Maximum number of bedrooms', type=click.IntRange(min=1), ) @click.option( '--min-price', default=0, help='Minimum price', type=click.IntRange(min=0), ) @click.option( '--max-price', default=1000000, help='Maximum price', type=click.IntRange(min=0), ) @click.option( '--district', default=None, help='Districts to scrape', type=click.Choice(get_districts().keys(), case_sensitive=False), multiple=True, ) @click.option( '--furnish-types', '-f', help='Furnish types for rented listings', type=click.Choice( [ furnish_type.name for furnish_type in FurnishType.__members__.values() ], case_sensitive=False, ), multiple=True, ) @click.pass_context def dump_listings( ctx: click.core.Context, district: list[str], min_bedrooms: int, max_bedrooms: int, min_price: int, max_price: int, type: str, furnish_types: list[str], ): data_dir: str = ctx.obj['data_dir'] query_parameters = dump_listings_module.QueryParameters( listing_type=ListingType[type], district_names=set(district), min_bedrooms=min_bedrooms, max_bedrooms=max_bedrooms, min_price=min_price, max_price=max_price, furnish_types=[ FurnishType[furnish_type] for furnish_type in furnish_types ], ) click.echo( f'Running dump_listings for districts {district}, data dir {data_dir} and parameters: ' f'{query_parameters}') data_dir_path = pathlib.Path(data_dir) asyncio.run( dump_listings_module.dump_listings(query_parameters, data_dir_path)) @cli.command() @click.pass_context def dump_details(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running dump_detail for listings stored in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) asyncio.run(dump_detail_module.dump_detail(listing_paths)) @cli.command() @click.pass_context def dump_images(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running dump_images stored in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) asyncio.run(dump_images_module.dump_images(listing_paths)) @cli.command() @click.pass_context def detect_floorplan(ctx: click.core.Context): data_dir = ctx.obj['data_dir'] click.echo(f'Running detect_floorplan in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) asyncio.run(detect_floorplan_module.detect_floorplan(listing_paths)) @cli.command() @click.option( '--destination-address', '-d', help='Destination address for routing', required=True, type=click.STRING, ) @click.option( '--travel-mode', '-m', help='Travel mode for routing', type=click.Choice( TravelMode.__members__.keys(), case_sensitive=False, ), required=True, ) @click.option( '--limit', '-l', help='Limit the number of listings to process', type=click.IntRange(min=1), default=1, # by default limit to 1 to avoid accidental API usage ) @click.pass_context def routing(ctx: click.core.Context, destination_address: str, travel_mode: str, limit: int): data_dir = ctx.obj['data_dir'] click.echo(f'Running routing for the first {limit} listings in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) listing_paths = listing_paths[:limit] if os.environ.get(API_KEY_ENVIRONMENT_VARIABLE) is None: raise click.exceptions.MissingParameter( f'{API_KEY_ENVIRONMENT_VARIABLE} environment variable is not set. ' 'Please set it to your API key for the routing service.') asyncio.run( routing_module.calculate_route( listing_paths, destination_address, # destination_address_coordinates, TravelMode[travel_mode], )) @cli.command() @click.option( '--columns', '-C', help='Columns to include in the CSV file', type=click.Choice( Listing.ALL_COLUMNS, case_sensitive=False, ), multiple=True, default=Listing.ALL_COLUMNS, ) @click.option( '--output-file', '-O', help='Path to the output CSV file', required=True, type=click.Path( writable=True, file_okay=True, dir_okay=False, resolve_path=True, ), ) @click.pass_context def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]): data_dir = ctx.obj['data_dir'] click.echo(f'Exporting data to {output_file} using {data_dir=}') output_file_path = pathlib.Path(output_file) listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) listings = Listing.get_all_listings([str(path) for path in listing_paths]) asyncio.run( csv_exporter.export_to_csv(listings, output_file_path, list(columns)), ) if __name__ == '__main__': cli()