From 9134145e02083a3c35b0e15ecf50bef98bd9279f Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 11 May 2025 19:11:23 +0000 Subject: [PATCH] [5/n] click-ify add routing command run with: poetry run python main.py --step routing --- crawler/5_routing.py | 81 +++++++++++++++++++++++++------------------- crawler/main.py | 2 ++ 2 files changed, 49 insertions(+), 34 deletions(-) diff --git a/crawler/5_routing.py b/crawler/5_routing.py index a9931c7..81b58df 100644 --- a/crawler/5_routing.py +++ b/crawler/5_routing.py @@ -1,43 +1,56 @@ from data_access import Listing from tqdm import tqdm from geopy.distance import geodesic -from logger import createLogger +import logging -log = createLogger(__name__) -listings = Listing.get_all_listings() -BROCK_STREET_LAT_LONG = 51.52570434674584, -0.13956495005056113 +def calculate_route(): + log = logging.getLogger(__name__) + log.setLevel(logging.INFO) -# reduce listings to everything within 7 miles -filtered_listings = [] -for listing in listings: - miles = geodesic(BROCK_STREET_LAT_LONG, (listing.latitude, listing.longitude)).miles - if listing.isRemoved: - log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.") - continue - if miles > 7: - log.info( - f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away" - ) - continue - if listing.path_routing_json().exists(): - log.info( - f"Path-Skip: Skipping {listing.identifier} as path routing already exists" - ) - continue - if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200: - log.info( - f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}" - ) - continue - filtered_listings.append(listing) + listings = Listing.get_all_listings() + BROCK_STREET_LAT_LONG = 51.52570434674584, -0.13956495005056113 -print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}") + # reduce listings to everything within 7 miles + filtered_listings = [] + for listing in listings: + miles = geodesic( + BROCK_STREET_LAT_LONG, (listing.latitude, listing.longitude) + ).miles + if listing.isRemoved: + log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.") + continue + if miles > 7: + log.info( + f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away" + ) + continue + if listing.path_routing_json().exists(): + log.info( + f"Path-Skip: Skipping {listing.identifier} as path routing already exists" + ) + continue + if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200: + log.info( + f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}" + ) + continue + filtered_listings.append(listing) -for listing in tqdm(filtered_listings): - lat, long = BROCK_STREET_LAT_LONG - listing.calculate_route(lat, long, recalculate=False) - traveltime = listing.travel_time[0] - duration_minutes = traveltime["duration"] / 60.0 + print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}") - tqdm.write(f"{listing.identifier} {duration_minutes}") + for listing in tqdm(filtered_listings): + lat, long = BROCK_STREET_LAT_LONG + listing.calculate_route(lat, long, recalculate=False) + traveltime = listing.travel_time[0] + duration_minutes = traveltime["duration"] / 60.0 + + tqdm.write(f"{listing.identifier} {duration_minutes}") + + +def main(): + calculate_route() + + +if __name__ == "__main__": + main() diff --git a/crawler/main.py b/crawler/main.py index f4a287e..4e49dd2 100644 --- a/crawler/main.py +++ b/crawler/main.py @@ -5,12 +5,14 @@ dump_listings_module = importlib.import_module('1_dump_listings') dump_detail_module = importlib.import_module('2_dump_detail') dump_images_module = importlib.import_module('3_dump_images') detect_floorplan_module = importlib.import_module('4_detect_floorplan') +routing_module = importlib.import_module('5_routing') steps_to_handlers = { 'dump_listings': dump_listings_module.dump_listings, 'dump_detail': dump_detail_module.dump_detail, 'dump_images': dump_images_module.dump_images, 'detect_floorplan': detect_floorplan_module.detect_floorplan, + 'routing': routing_module.calculate_route, }