[1/n] click-ify - add entrypoint for click script and add

1_dump_listings command

run via:
poetry run python main.py --step dump_listings
This commit is contained in:
Viktor Barzin 2025-05-11 18:59:41 +00:00
parent 0a66efa48a
commit 90b531f5d9
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
4 changed files with 272 additions and 64 deletions

View file

@ -1,38 +1,45 @@
from rec.query import listing_query
from rec.districts import get_districts
import pathlib
from data_access import Listing
folder = pathlib.Path("data/rs/")
districts = get_districts()
for district, locid in districts.items():
print("#### District:", district)
for i in [1, 2]:
try:
d = listing_query(
page=i,
min_bedrooms=1,
max_bedrooms=4,
radius=0,
min_price=0,
max_price=1000000,
location_id=locid,
page_size=500,
max_days_since_added=None,
)
except Exception as e:
print(e)
break
if i == 1:
print("totalAvailableResults: ", d["totalAvailableResults"])
if len(d["properties"]) == 0:
break
print(f"page {i}", end=", ", flush=True)
def dump_listings():
districts = get_districts()
for district, locid in districts.items():
print("#### District:", district)
for i in [1, 2]:
try:
d = listing_query(
page=i,
min_bedrooms=1,
max_bedrooms=4,
radius=0,
min_price=0,
max_price=1000000,
location_id=locid,
page_size=500,
max_days_since_added=None,
)
except Exception as e:
print(e)
break
if i == 1:
print("totalAvailableResults: ", d["totalAvailableResults"])
if len(d["properties"]) == 0:
break
print(f"page {i}", end=", ", flush=True)
for property in d["properties"]:
identifier = property["identifier"]
for property in d["properties"]:
identifier = property["identifier"]
listing = Listing(identifier)
listing.dump_listing(property)
print() # break line as we used end=, above.
listing = Listing(identifier)
listing.dump_listing(property)
print() # break line as we used end=, above.
def main():
dump_listings()
if __name__ == "__main__":
main()

26
crawler/main.py Normal file
View file

@ -0,0 +1,26 @@
import click
import importlib
dump_listings_module = importlib.import_module('1_dump_listings')
steps_to_handlers = {
'dump_listings': dump_listings_module.dump_listings,
}
@click.command()
@click.option(
'--step',
default=[],
help='Scraping step to run',
multiple=True,
type=click.Choice(steps_to_handlers.keys())
)
def main(step: list[str]):
for s in step:
click.echo(f'Calling handler for step: {s}')
steps_to_handlers[s]()
if __name__ == '__main__':
main()

238
crawler/poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -21,8 +21,7 @@ pandas = "^2.2.1"
geopy = "^2.4.1"
matplotlib = "^3.10.0"
opencv-python = "^4.11.0.86"
[tool.poetry.group.dev.dependencies]
click = "^8.2.0"
[build-system]
requires = ["poetry-core>=1.0.0"]