diff --git a/crawler/.env.sample b/crawler/.env.sample index ac3a171..41c5d7f 100644 --- a/crawler/.env.sample +++ b/crawler/.env.sample @@ -1,3 +1,3 @@ # Copy me to .env and source me -export ROUTING_API_KEY="" # fetch from https://console.cloud.google.com/google/maps-apis/ +export ROUTING_API_KEY="" # fetch from https://console.cloud.google.com/google/maps-apis/; prices - https://developers.google.com/maps/billing-and-pricing/pricing diff --git a/crawler/5_routing.py b/crawler/5_routing.py index 815b6f4..f9b9404 100644 --- a/crawler/5_routing.py +++ b/crawler/5_routing.py @@ -1,38 +1,31 @@ from data_access import Listing from tqdm import tqdm -from geopy.distance import geodesic -import logging +from rec import routing -def calculate_route(listing_paths: list[str]): - log = logging.getLogger(__name__) - log.setLevel(logging.INFO) +def calculate_route( + listing_paths: list[str], + destination_address: str, + travel_mode: routing.TravelMode, +): listings = Listing.get_all_listings(listing_paths) - BROCK_STREET_LAT_LONG = 51.52570434674584, -0.13956495005056113 # reduce listings to everything within 7 miles filtered_listings = [] for listing in listings: - miles = geodesic(BROCK_STREET_LAT_LONG, - (listing.latitude, listing.longitude)).miles if listing.isRemoved: - log.info(f"Removed-Skip: Skipping {listing.identifier} " - "is already removed.") - continue - if miles > 7: - log.info(f"Miles-Skip: Skipping {listing.identifier} as it is " - f"{miles} miles away") + print(f"Removed-Skip: Skipping {listing.identifier} " + "is already removed.") continue if listing.path_routing_json().exists(): - log.info( - (f"Path-Skip: Skipping {listing.identifier} as path routing " - "already exists")) + print(f"Path-Skip: Skipping {listing.identifier} as path routing " + "already exists") continue if (listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200): - log.info((f"Floorplan-Skip: Skipping {listing.identifier} as " - f"sqm_ocr is {listing.sqm_ocr}")) + print((f"Floorplan-Skip: Skipping {listing.identifier} as " + f"sqm_ocr is {listing.sqm_ocr}")) continue filtered_listings.append(listing) @@ -40,8 +33,11 @@ def calculate_route(listing_paths: list[str]): f"Filtered listings from {len(listings)} to {len(filtered_listings)}") for listing in tqdm(filtered_listings): - lat, long = BROCK_STREET_LAT_LONG - listing.calculate_route(lat, long, recalculate=False) + listing.calculate_route( + destination_address, + travel_mode, + recalculate=False, + ) traveltime = listing.travel_time[0] duration_minutes = traveltime["duration"] / 60.0 diff --git a/crawler/data_access.py b/crawler/data_access.py index c8f6551..7afe0c9 100644 --- a/crawler/data_access.py +++ b/crawler/data_access.py @@ -166,14 +166,18 @@ class Listing: return max_sqm def calculate_route(self, - dest_lat: float, - dest_lon: float, + dest_address: str, + travel_mode: routing.TravelMode, recalculate=False): if self.path_routing_json().exists() and not recalculate: return - result = routing.transit_route(self.latitude, self.longitude, dest_lat, - dest_lon) + result = routing.transit_route( + self.latitude, + self.longitude, + dest_address, + travel_mode, + ) with open(self.path_routing_json(), "w") as f: json.dump(result, f) diff --git a/crawler/main.py b/crawler/main.py index f6136b6..165a190 100644 --- a/crawler/main.py +++ b/crawler/main.py @@ -1,4 +1,5 @@ import asyncio +import os import pathlib import click import importlib @@ -7,6 +8,7 @@ from rec.districts import get_districts from data_access import Listing import csv_exporter from rec.query import ListingType, FurnishType +from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode dump_listings_module = importlib.import_module('1_dump_listings') dump_detail_module = importlib.import_module('2_dump_detail') @@ -148,12 +150,46 @@ def detect_floorplan(ctx: click.core.Context): @cli.command() +@click.option( + '--destination-address', + '-d', + help='Destination address for routing', + required=True, + type=click.STRING, +) +@click.option( + '--travel-mode', + '-m', + help='Travel mode for routing', + type=click.Choice( + TravelMode.__members__.keys(), + case_sensitive=False, + ), + required=True, +) +@click.option( + '--limit', + '-l', + help='Limit the number of listings to process', + type=click.IntRange(min=1), + default=1, # by default limit to 1 to avoid accidental API usage +) @click.pass_context -def routing(ctx: click.core.Context): +def routing(ctx: click.core.Context, destination_address: str, + travel_mode: str, limit: int): data_dir = ctx.obj['data_dir'] - click.echo(f'Running routing for listings in {data_dir}') + click.echo(f'Running routing for the first {limit} listings in {data_dir}') listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) - routing_module.calculate_route(listing_paths) + listing_paths = listing_paths[:limit] + if os.environ.get(API_KEY_ENVIRONMENT_VARIABLE) is None: + raise click.exceptions.MissingParameter( + f'{API_KEY_ENVIRONMENT_VARIABLE} environment variable is not set. ' + 'Please set it to your API key for the routing service.') + routing_module.calculate_route( + listing_paths, + destination_address, + TravelMode[travel_mode], + ) @cli.command() diff --git a/crawler/rec/routing.py b/crawler/rec/routing.py index 413c26c..ca8e117 100644 --- a/crawler/rec/routing.py +++ b/crawler/rec/routing.py @@ -1,50 +1,60 @@ +import enum +import os +from typing import Any import requests from rec.utils import nextMonday from collections import defaultdict -API_KEY = "AIzaSyBoBHzeQFgR7O-NlNsuHXQcC1B7ccEHpl8" url = "https://routes.googleapis.com/directions/v2:computeRoutes" +API_KEY_ENVIRONMENT_VARIABLE = "ROUTING_API_KEY" + + +class TravelMode(enum.StrEnum): + TRANSIT = "TRANSIT" + BICYCLE = "BICYCLE" + WALK = "WALK" + DRIVE = "DRIVE" def transit_route( origin_lat: float, origin_lon: float, - dest_lat: float, - dest_lon: float, + dest_address: str, + travel_mode: TravelMode, compute_alternative_routes=True, -): +) -> dict[str, Any]: monday9am = nextMonday() + # must be set + api_key = os.environ[API_KEY_ENVIRONMENT_VARIABLE] + header = { - "X-Goog-Api-Key": - API_KEY, - "Content-Type": - "application/json", - "X-Goog-FieldMask": - "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode", + "X-Goog-Api-Key": api_key, + "Content-Type": "application/json", + "X-Goog-FieldMask": # "routes.*", + "routes.distanceMeters,routes.duration,routes.staticDuration,routes.legs.steps.distanceMeters,routes.legs.steps.staticDuration,routes.legs.steps.travelMode", } body = { - "origin": - { - "location": - { - "latLng": { - "latitude": origin_lat, - "longitude": origin_lon - } - } - }, - "destination": - { - "location": { - "latLng": { - "latitude": dest_lat, - "longitude": dest_lon - } + "origin": { + # "address": origin_address + "location": { + "latLng": { + "latitude": origin_lat, + "longitude": origin_lon } - }, - "travelMode": "TRANSIT", + } + }, + "destination": { + "address": dest_address + # "location": { + # "latLng": { + # "latitude": dest_lat, + # "longitude": dest_lon + # } + # } + }, + "travelMode": travel_mode.value, # "2023-10-15T15:01:23.045123456Z" "departureTime": monday9am.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), "computeAlternativeRoutes": compute_alternative_routes, @@ -84,39 +94,20 @@ def extract_time(d, limit: int = 2): else: used_transit = True duration_per_transit[step["travelMode"]] += int( - step["staticDuration"].strip("s") - ) - distance_per_transit[step["travelMode"]] += step.get("distanceMeters", 0) + step["staticDuration"].strip("s")) + distance_per_transit[step["travelMode"]] += step.get( + "distanceMeters", 0) if step["travelMode"] == "TRANSIT": number_of_transit_stops += 1 - res.append( - { - "duration": duration, - "distance": distance, - "duration_static": duration_static, - "initial_walk_duration": initial_walk_duration, - "duration_per_transit": dict(duration_per_transit), - "distance_per_transit": dict(distance_per_transit), - "number_of_transit_stops": number_of_transit_stops, - } - ) + res.append({ + "duration": duration, + "distance": distance, + "duration_static": duration_static, + "initial_walk_duration": initial_walk_duration, + "duration_per_transit": dict(duration_per_transit), + "distance_per_transit": dict(distance_per_transit), + "number_of_transit_stops": number_of_transit_stops, + }) return res[:limit] - - -if __name__ == "__main__": - import json - - with open("code/json/routing_routeapi.json", "r") as f: - d = json.load(f) - - extract_time(d) - -# if __name__ == "__main__": -# origin = 51.5635664310333, -0.1107173751570373 # home -# dest = 51.50475678313417, 0.04915321000190009 # london city airport -# d = travel_time(origin[0], origin[1], dest[0], dest[1]) -# import json -# with open('code/json/routing_routeapi.json', 'w') as f: -# json.dump(d, f)