From ea56555884fb3bfbfc95dd1dc5f05d53c3210fb2 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 14 May 2025 19:41:13 +0000 Subject: [PATCH] refactor main.py click to use click commands to allow passing parameters to commands and enable fetching districts by district name --- crawler/1_dump_listings.py | 9 +++++-- crawler/main.py | 51 ++++++++++++++++++++++++++++++-------- crawler/rec/districts.py | 51 +++++++++++++++++++------------------- 3 files changed, 73 insertions(+), 38 deletions(-) diff --git a/crawler/1_dump_listings.py b/crawler/1_dump_listings.py index 93f7c94..8ba92d1 100644 --- a/crawler/1_dump_listings.py +++ b/crawler/1_dump_listings.py @@ -3,8 +3,13 @@ from rec.districts import get_districts from data_access import Listing -def dump_listings(): - districts = get_districts() +def dump_listings(district_names: set[str] | None = None): + districts = get_districts() if district_names is None else { + district: locid + for district, locid in get_districts().items() + if district in district_names + } + print("Valid districts to scrape:", districts.keys()) for district, locid in districts.items(): print("#### District:", district) for i in [1, 2]: diff --git a/crawler/main.py b/crawler/main.py index 061c14b..e9a60a2 100644 --- a/crawler/main.py +++ b/crawler/main.py @@ -1,6 +1,8 @@ import click import importlib +from rec.districts import get_districts + dump_listings_module = importlib.import_module('1_dump_listings') dump_detail_module = importlib.import_module('2_dump_detail') dump_images_module = importlib.import_module('3_dump_images') @@ -16,20 +18,47 @@ steps_to_handlers = { } -@click.command() +@click.group() +def cli(): + pass + + +@cli.command() @click.option( - '--step', - default=[], - help='Scraping step to run', + '--district', + default=None, + help='Districts to scrape', + type=click.Choice(get_districts().keys(), case_sensitive=False), multiple=True, - type=click.Choice(steps_to_handlers.keys()) ) -def main(step: list[str]): - click.echo(f'Running steps: {step}') - for s in step: - click.echo(f'Calling handler for step: {s}') - steps_to_handlers[s]() +def dump_listings(district: list[str]): + click.echo(f'Running dump_listings for districts {district}') + dump_listings_module.dump_listings(set(district)) + + +@cli.command() +def dump_detail(): + click.echo('Running dump_detail') + dump_detail_module.dump_detail() + + +@cli.command() +def dump_images(): + click.echo('Running dump_images') + dump_images_module.dump_images() + + +@cli.command() +def detect_floorplan(): + click.echo('Running detect_floorplan') + detect_floorplan_module.detect_floorplan() + + +@cli.command() +def routing(): + click.echo('Running routing') + routing_module.calculate_route() if __name__ == '__main__': - main() + cli() diff --git a/crawler/rec/districts.py b/crawler/rec/districts.py index 823199c..04f660c 100644 --- a/crawler/rec/districts.py +++ b/crawler/rec/districts.py @@ -1,36 +1,37 @@ def get_districts(): return { - # "Barking and Dagenham": "REGION^61400", - # "Barnet": "REGION^93929", - # "Bexley": "REGION^93932", - # "Brent": "REGION^93935", - # "Bromley": "REGION^93938", + "Barking and Dagenham": "REGION^61400", + "Barnet": "REGION^93929", + "Bexley": "REGION^93932", + "Brent": "REGION^93935", + "Bromley": "REGION^93938", "Camden": "REGION^93941", "City of London": "REGION^61224", - # "Croydon": "REGION^93944", - # "Ealing": "REGION^93947", - # "Enfield": "REGION^93950", - # "Greenwich": "REGION^61226", + "Croydon": "REGION^93944", + "Ealing": "REGION^93947", + "Enfield": "REGION^93950", + "Greenwich": "REGION^61226", "Hackney": "REGION^93953", "Hammersmith and Fulham": "REGION^61407", - # "Haringey": "REGION^61227", - # "Harrow": "REGION^93956", - # "Havering": "REGION^61228", - # "Hillingdon": "REGION^93959", - # "Hounslow": "REGION^93962", + "Haringey": "REGION^61227", + "Harrow": "REGION^93956", + "Havering": "REGION^61228", + "Hillingdon": "REGION^93959", + "Hounslow": "REGION^93962", "Islington": "REGION^93965", + "London": "REGION^87490", "Kensington and Chelsea": "REGION^61229", - # "Kingston upon Thames": "REGION^93968", - # "Lambeth": "REGION^93971", - # "Lewisham": "REGION^61413", - # "Merton": "REGION^61414", - # "Newham": "REGION^61231", - # "Redbridge": "REGION^61537", - # "Richmond upon Thames": "REGION^61415", - # "Southwark": "REGION^61518", - # "Sutton": "REGION^93974", + "Kingston upon Thames": "REGION^93968", + "Lambeth": "REGION^93971", + "Lewisham": "REGION^61413", + "Merton": "REGION^61414", + "Newham": "REGION^61231", + "Redbridge": "REGION^61537", + "Richmond upon Thames": "REGION^61415", + "Southwark": "REGION^61518", + "Sutton": "REGION^93974", "Tower Hamlets": "REGION^61417", - # "Waltham Forest": "REGION^61232", - # "Wandsworth": "REGION^93977", + "Waltham Forest": "REGION^61232", + "Wandsworth": "REGION^93977", "Westminster": "REGION^93980", }