parameterize data path when fetching listings

This commit is contained in:
Viktor Barzin 2025-05-14 20:19:08 +00:00
parent ea56555884
commit 48d379567b
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 28 additions and 11 deletions

View file

@ -1,9 +1,13 @@
import pathlib
from rec.query import listing_query from rec.query import listing_query
from rec.districts import get_districts from rec.districts import get_districts
from data_access import Listing from data_access import Listing
def dump_listings(district_names: set[str] | None = None): def dump_listings(
district_names: set[str] | None = None,
data_dir: pathlib.Path = pathlib.Path("data/rs/")
):
districts = get_districts() if district_names is None else { districts = get_districts() if district_names is None else {
district: locid district: locid
for district, locid in get_districts().items() for district, locid in get_districts().items()
@ -37,7 +41,7 @@ def dump_listings(district_names: set[str] | None = None):
for property in d["properties"]: for property in d["properties"]:
identifier = property["identifier"] identifier = property["identifier"]
listing = Listing(identifier) listing = Listing(identifier, data_dir=data_dir)
listing.dump_listing(property) listing.dump_listing(property)
print() # break line as we used end=, above. print() # break line as we used end=, above.

View file

@ -6,17 +6,15 @@ from rec import floorplan, routing
import re import re
import datetime import datetime
_DATA_DIR = pathlib.Path("data/rs/")
@dataclass() @dataclass()
class Listing: class Listing:
identifier: int identifier: int
_cached: Dict = None _cached: Dict = None
data_dir: pathlib.Path = pathlib.Path("data/rs/")
@staticmethod def get_all_listings(self) -> List["Listing"]:
def get_all_listings() -> List["Listing"]: listing_paths = sorted(list(self.data_dir.glob("*/listing.json")))
listing_paths = sorted(list(_DATA_DIR.glob("*/listing.json")))
identifiers = [] identifiers = []
for listing_path in listing_paths: for listing_path in listing_paths:
with open(listing_path) as f: with open(listing_path) as f:
@ -26,7 +24,7 @@ class Listing:
return identifiers return identifiers
def path_listing(self) -> pathlib.Path: def path_listing(self) -> pathlib.Path:
p = _DATA_DIR / str(self.identifier) p = self.data_dir / str(self.identifier)
p.mkdir(parents=True, exist_ok=True) p.mkdir(parents=True, exist_ok=True)
return p return p

View file

@ -1,3 +1,4 @@
import pathlib
import click import click
import importlib import importlib
@ -31,9 +32,23 @@ def cli():
type=click.Choice(get_districts().keys(), case_sensitive=False), type=click.Choice(get_districts().keys(), case_sensitive=False),
multiple=True, multiple=True,
) )
def dump_listings(district: list[str]): @click.option(
click.echo(f'Running dump_listings for districts {district}') '--data-dir',
dump_listings_module.dump_listings(set(district)) default=pathlib.Path("data/rs/"),
help='Districts to scrape',
type=click.Path(
writable=True,
file_okay=False,
dir_okay=True,
resolve_path=True,
),
)
def dump_listings(district: list[str], data_dir: str):
click.echo(
f'Running dump_listings for districts {district} and data dir {data_dir}'
)
data_dir_path = pathlib.Path(data_dir)
dump_listings_module.dump_listings(set(district), data_dir_path)
@cli.command() @cli.command()