diff --git a/crawler/4_detect_floorplan.py b/crawler/4_detect_floorplan.py index 97dc8d9..525dc55 100644 --- a/crawler/4_detect_floorplan.py +++ b/crawler/4_detect_floorplan.py @@ -1,9 +1,10 @@ +import pathlib from data_access import Listing from tqdm import tqdm -def detect_floorplan(): - listings = Listing.get_all_listings() +def detect_floorplan(listing_paths: list[str]): + listings = Listing.get_all_listings(listing_paths) for listing in tqdm(listings): tqdm.write(str(listing.identifier)) @@ -12,7 +13,8 @@ def detect_floorplan(): def main(): - detect_floorplan() + listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json"))) + detect_floorplan(listing_paths) if __name__ == "__main__": diff --git a/crawler/main.py b/crawler/main.py index 1d1ef62..6fe413c 100644 --- a/crawler/main.py +++ b/crawler/main.py @@ -75,9 +75,12 @@ def dump_images(ctx: click.core.Context): @cli.command() -def detect_floorplan(): - click.echo('Running detect_floorplan') - detect_floorplan_module.detect_floorplan() +@click.pass_context +def detect_floorplan(ctx: click.core.Context): + data_dir = ctx.obj['data_dir'] + click.echo(f'Running detect_floorplan in {data_dir}') + listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) + detect_floorplan_module.detect_floorplan(listing_paths) @cli.command()