From e3abf4b37324f46a822816deb2124ba6c613335d Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 14 May 2025 21:05:59 +0000 Subject: [PATCH] parameterize detect floorplan step to work with custom data paths --- crawler/4_detect_floorplan.py | 8 +++++--- crawler/main.py | 9 ++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/crawler/4_detect_floorplan.py b/crawler/4_detect_floorplan.py index 97dc8d9..525dc55 100644 --- a/crawler/4_detect_floorplan.py +++ b/crawler/4_detect_floorplan.py @@ -1,9 +1,10 @@ +import pathlib from data_access import Listing from tqdm import tqdm -def detect_floorplan(): - listings = Listing.get_all_listings() +def detect_floorplan(listing_paths: list[str]): + listings = Listing.get_all_listings(listing_paths) for listing in tqdm(listings): tqdm.write(str(listing.identifier)) @@ -12,7 +13,8 @@ def detect_floorplan(): def main(): - detect_floorplan() + listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json"))) + detect_floorplan(listing_paths) if __name__ == "__main__": diff --git a/crawler/main.py b/crawler/main.py index 1d1ef62..6fe413c 100644 --- a/crawler/main.py +++ b/crawler/main.py @@ -75,9 +75,12 @@ def dump_images(ctx: click.core.Context): @cli.command() -def detect_floorplan(): - click.echo('Running detect_floorplan') - detect_floorplan_module.detect_floorplan() +@click.pass_context +def detect_floorplan(ctx: click.core.Context): + data_dir = ctx.obj['data_dir'] + click.echo(f'Running detect_floorplan in {data_dir}') + listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) + detect_floorplan_module.detect_floorplan(listing_paths) @cli.command()