[1/n] click-ify - add entrypoint for click script and add
1_dump_listings command run via: poetry run python main.py --step dump_listings
This commit is contained in:
parent
0a66efa48a
commit
90b531f5d9
4 changed files with 272 additions and 64 deletions
|
|
@ -1,38 +1,45 @@
|
||||||
from rec.query import listing_query
|
from rec.query import listing_query
|
||||||
from rec.districts import get_districts
|
from rec.districts import get_districts
|
||||||
import pathlib
|
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
folder = pathlib.Path("data/rs/")
|
|
||||||
districts = get_districts()
|
|
||||||
|
|
||||||
for district, locid in districts.items():
|
def dump_listings():
|
||||||
print("#### District:", district)
|
districts = get_districts()
|
||||||
for i in [1, 2]:
|
for district, locid in districts.items():
|
||||||
try:
|
print("#### District:", district)
|
||||||
d = listing_query(
|
for i in [1, 2]:
|
||||||
page=i,
|
try:
|
||||||
min_bedrooms=1,
|
d = listing_query(
|
||||||
max_bedrooms=4,
|
page=i,
|
||||||
radius=0,
|
min_bedrooms=1,
|
||||||
min_price=0,
|
max_bedrooms=4,
|
||||||
max_price=1000000,
|
radius=0,
|
||||||
location_id=locid,
|
min_price=0,
|
||||||
page_size=500,
|
max_price=1000000,
|
||||||
max_days_since_added=None,
|
location_id=locid,
|
||||||
)
|
page_size=500,
|
||||||
except Exception as e:
|
max_days_since_added=None,
|
||||||
print(e)
|
)
|
||||||
break
|
except Exception as e:
|
||||||
if i == 1:
|
print(e)
|
||||||
print("totalAvailableResults: ", d["totalAvailableResults"])
|
break
|
||||||
if len(d["properties"]) == 0:
|
if i == 1:
|
||||||
break
|
print("totalAvailableResults: ", d["totalAvailableResults"])
|
||||||
print(f"page {i}", end=", ", flush=True)
|
if len(d["properties"]) == 0:
|
||||||
|
break
|
||||||
|
print(f"page {i}", end=", ", flush=True)
|
||||||
|
|
||||||
for property in d["properties"]:
|
for property in d["properties"]:
|
||||||
identifier = property["identifier"]
|
identifier = property["identifier"]
|
||||||
|
|
||||||
listing = Listing(identifier)
|
listing = Listing(identifier)
|
||||||
listing.dump_listing(property)
|
listing.dump_listing(property)
|
||||||
print() # break line as we used end=, above.
|
print() # break line as we used end=, above.
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
dump_listings()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
|
||||||
26
crawler/main.py
Normal file
26
crawler/main.py
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
import click
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
dump_listings_module = importlib.import_module('1_dump_listings')
|
||||||
|
|
||||||
|
steps_to_handlers = {
|
||||||
|
'dump_listings': dump_listings_module.dump_listings,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option(
|
||||||
|
'--step',
|
||||||
|
default=[],
|
||||||
|
help='Scraping step to run',
|
||||||
|
multiple=True,
|
||||||
|
type=click.Choice(steps_to_handlers.keys())
|
||||||
|
)
|
||||||
|
def main(step: list[str]):
|
||||||
|
for s in step:
|
||||||
|
click.echo(f'Calling handler for step: {s}')
|
||||||
|
steps_to_handlers[s]()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
238
crawler/poetry.lock
generated
238
crawler/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -21,8 +21,7 @@ pandas = "^2.2.1"
|
||||||
geopy = "^2.4.1"
|
geopy = "^2.4.1"
|
||||||
matplotlib = "^3.10.0"
|
matplotlib = "^3.10.0"
|
||||||
opencv-python = "^4.11.0.86"
|
opencv-python = "^4.11.0.86"
|
||||||
|
click = "^8.2.0"
|
||||||
[tool.poetry.group.dev.dependencies]
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core>=1.0.0"]
|
requires = ["poetry-core>=1.0.0"]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue