diff --git a/crawler/5_routing.py b/crawler/5_routing.py index 1f827ae..16ee395 100644 --- a/crawler/5_routing.py +++ b/crawler/5_routing.py @@ -1,6 +1,9 @@ from data_access import Listing from tqdm import tqdm from geopy.distance import geodesic +from logger import createLogger + +log = createLogger(__name__) listings = Listing.get_all_listings() BROCK_STREET_LAT_LONG = 51.52570434674584, -0.13956495005056113 @@ -9,8 +12,19 @@ BROCK_STREET_LAT_LONG = 51.52570434674584, -0.13956495005056113 filtered_listings = [] for listing in listings: miles = geodesic(BROCK_STREET_LAT_LONG, (listing.latitude, listing.longitude)).miles - if miles <= 7 and not listing.path_routing_json().exists(): - filtered_listings.append(listing) + if listing.isRemoved: + log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.") + continue + if miles > 7: + log.info(f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away") + continue + if listing.path_routing_json().exists(): + log.info(f"Path-Skip: Skipping {listing.identifier} as path routing already exists") + continue + if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200: + log.info(f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}") + continue + filtered_listings.append(listing) print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")