[1/n] click-ify - add entrypoint for click script and add
1_dump_listings command run via: poetry run python main.py --step dump_listings
This commit is contained in:
parent
0a66efa48a
commit
90b531f5d9
4 changed files with 272 additions and 64 deletions
|
|
@ -1,38 +1,45 @@
|
|||
from rec.query import listing_query
|
||||
from rec.districts import get_districts
|
||||
import pathlib
|
||||
from data_access import Listing
|
||||
|
||||
folder = pathlib.Path("data/rs/")
|
||||
districts = get_districts()
|
||||
|
||||
for district, locid in districts.items():
|
||||
print("#### District:", district)
|
||||
for i in [1, 2]:
|
||||
try:
|
||||
d = listing_query(
|
||||
page=i,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=4,
|
||||
radius=0,
|
||||
min_price=0,
|
||||
max_price=1000000,
|
||||
location_id=locid,
|
||||
page_size=500,
|
||||
max_days_since_added=None,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
break
|
||||
if i == 1:
|
||||
print("totalAvailableResults: ", d["totalAvailableResults"])
|
||||
if len(d["properties"]) == 0:
|
||||
break
|
||||
print(f"page {i}", end=", ", flush=True)
|
||||
def dump_listings():
|
||||
districts = get_districts()
|
||||
for district, locid in districts.items():
|
||||
print("#### District:", district)
|
||||
for i in [1, 2]:
|
||||
try:
|
||||
d = listing_query(
|
||||
page=i,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=4,
|
||||
radius=0,
|
||||
min_price=0,
|
||||
max_price=1000000,
|
||||
location_id=locid,
|
||||
page_size=500,
|
||||
max_days_since_added=None,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
break
|
||||
if i == 1:
|
||||
print("totalAvailableResults: ", d["totalAvailableResults"])
|
||||
if len(d["properties"]) == 0:
|
||||
break
|
||||
print(f"page {i}", end=", ", flush=True)
|
||||
|
||||
for property in d["properties"]:
|
||||
identifier = property["identifier"]
|
||||
for property in d["properties"]:
|
||||
identifier = property["identifier"]
|
||||
|
||||
listing = Listing(identifier)
|
||||
listing.dump_listing(property)
|
||||
print() # break line as we used end=, above.
|
||||
listing = Listing(identifier)
|
||||
listing.dump_listing(property)
|
||||
print() # break line as we used end=, above.
|
||||
|
||||
|
||||
def main():
|
||||
dump_listings()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
26
crawler/main.py
Normal file
26
crawler/main.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import click
|
||||
import importlib
|
||||
|
||||
dump_listings_module = importlib.import_module('1_dump_listings')
|
||||
|
||||
steps_to_handlers = {
|
||||
'dump_listings': dump_listings_module.dump_listings,
|
||||
}
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
'--step',
|
||||
default=[],
|
||||
help='Scraping step to run',
|
||||
multiple=True,
|
||||
type=click.Choice(steps_to_handlers.keys())
|
||||
)
|
||||
def main(step: list[str]):
|
||||
for s in step:
|
||||
click.echo(f'Calling handler for step: {s}')
|
||||
steps_to_handlers[s]()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
238
crawler/poetry.lock
generated
238
crawler/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -21,8 +21,7 @@ pandas = "^2.2.1"
|
|||
geopy = "^2.4.1"
|
||||
matplotlib = "^3.10.0"
|
||||
opencv-python = "^4.11.0.86"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
click = "^8.2.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue