parameterize data path when fetching listings

This commit is contained in:
Viktor Barzin 2025-05-14 20:19:08 +00:00
parent ea56555884
commit 48d379567b
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 28 additions and 11 deletions

View file

@ -1,9 +1,13 @@
import pathlib
from rec.query import listing_query
from rec.districts import get_districts
from data_access import Listing
def dump_listings(district_names: set[str] | None = None):
def dump_listings(
district_names: set[str] | None = None,
data_dir: pathlib.Path = pathlib.Path("data/rs/")
):
districts = get_districts() if district_names is None else {
district: locid
for district, locid in get_districts().items()
@ -37,7 +41,7 @@ def dump_listings(district_names: set[str] | None = None):
for property in d["properties"]:
identifier = property["identifier"]
listing = Listing(identifier)
listing = Listing(identifier, data_dir=data_dir)
listing.dump_listing(property)
print() # break line as we used end=, above.

View file

@ -6,17 +6,15 @@ from rec import floorplan, routing
import re
import datetime
_DATA_DIR = pathlib.Path("data/rs/")
@dataclass()
class Listing:
identifier: int
_cached: Dict = None
data_dir: pathlib.Path = pathlib.Path("data/rs/")
@staticmethod
def get_all_listings() -> List["Listing"]:
listing_paths = sorted(list(_DATA_DIR.glob("*/listing.json")))
def get_all_listings(self) -> List["Listing"]:
listing_paths = sorted(list(self.data_dir.glob("*/listing.json")))
identifiers = []
for listing_path in listing_paths:
with open(listing_path) as f:
@ -26,7 +24,7 @@ class Listing:
return identifiers
def path_listing(self) -> pathlib.Path:
p = _DATA_DIR / str(self.identifier)
p = self.data_dir / str(self.identifier)
p.mkdir(parents=True, exist_ok=True)
return p

View file

@ -1,3 +1,4 @@
import pathlib
import click
import importlib
@ -31,9 +32,23 @@ def cli():
type=click.Choice(get_districts().keys(), case_sensitive=False),
multiple=True,
)
def dump_listings(district: list[str]):
click.echo(f'Running dump_listings for districts {district}')
dump_listings_module.dump_listings(set(district))
@click.option(
'--data-dir',
default=pathlib.Path("data/rs/"),
help='Districts to scrape',
type=click.Path(
writable=True,
file_okay=False,
dir_okay=True,
resolve_path=True,
),
)
def dump_listings(district: list[str], data_dir: str):
click.echo(
f'Running dump_listings for districts {district} and data dir {data_dir}'
)
data_dir_path = pathlib.Path(data_dir)
dump_listings_module.dump_listings(set(district), data_dir_path)
@cli.command()