From 48f694e002a6a71f63ebe8dc68f1c9490b108fab Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 11 May 2025 19:06:08 +0000 Subject: [PATCH] [4/n] click-ify add detect floorplan command run with poetry run python main.py --step detect_floorplan --- crawler/4_detect_floorplan.py | 20 +++++++++++++++----- crawler/main.py | 2 ++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/crawler/4_detect_floorplan.py b/crawler/4_detect_floorplan.py index 388f2d4..97dc8d9 100644 --- a/crawler/4_detect_floorplan.py +++ b/crawler/4_detect_floorplan.py @@ -1,9 +1,19 @@ from data_access import Listing from tqdm import tqdm -listings = Listing.get_all_listings() -for listing in tqdm(listings): - tqdm.write(str(listing.identifier)) - # listing.calculate_sqm_model() # using google/deplot model. Too slow, rather use tesseract - listing.calculate_sqm_ocr(recalculate=False) +def detect_floorplan(): + listings = Listing.get_all_listings() + + for listing in tqdm(listings): + tqdm.write(str(listing.identifier)) + # listing.calculate_sqm_model() # using google/deplot model. Too slow, rather use tesseract + listing.calculate_sqm_ocr(recalculate=False) + + +def main(): + detect_floorplan() + + +if __name__ == "__main__": + main() diff --git a/crawler/main.py b/crawler/main.py index 530a58f..f4a287e 100644 --- a/crawler/main.py +++ b/crawler/main.py @@ -4,11 +4,13 @@ import importlib dump_listings_module = importlib.import_module('1_dump_listings') dump_detail_module = importlib.import_module('2_dump_detail') dump_images_module = importlib.import_module('3_dump_images') +detect_floorplan_module = importlib.import_module('4_detect_floorplan') steps_to_handlers = { 'dump_listings': dump_listings_module.dump_listings, 'dump_detail': dump_detail_module.dump_detail, 'dump_images': dump_images_module.dump_images, + 'detect_floorplan': detect_floorplan_module.detect_floorplan, }