parameterize data path when fetching listings
This commit is contained in:
parent
ea56555884
commit
48d379567b
3 changed files with 28 additions and 11 deletions
|
|
@ -1,9 +1,13 @@
|
|||
import pathlib
|
||||
from rec.query import listing_query
|
||||
from rec.districts import get_districts
|
||||
from data_access import Listing
|
||||
|
||||
|
||||
def dump_listings(district_names: set[str] | None = None):
|
||||
def dump_listings(
|
||||
district_names: set[str] | None = None,
|
||||
data_dir: pathlib.Path = pathlib.Path("data/rs/")
|
||||
):
|
||||
districts = get_districts() if district_names is None else {
|
||||
district: locid
|
||||
for district, locid in get_districts().items()
|
||||
|
|
@ -37,7 +41,7 @@ def dump_listings(district_names: set[str] | None = None):
|
|||
for property in d["properties"]:
|
||||
identifier = property["identifier"]
|
||||
|
||||
listing = Listing(identifier)
|
||||
listing = Listing(identifier, data_dir=data_dir)
|
||||
listing.dump_listing(property)
|
||||
print() # break line as we used end=, above.
|
||||
|
||||
|
|
|
|||
|
|
@ -6,17 +6,15 @@ from rec import floorplan, routing
|
|||
import re
|
||||
import datetime
|
||||
|
||||
_DATA_DIR = pathlib.Path("data/rs/")
|
||||
|
||||
|
||||
@dataclass()
|
||||
class Listing:
|
||||
identifier: int
|
||||
_cached: Dict = None
|
||||
data_dir: pathlib.Path = pathlib.Path("data/rs/")
|
||||
|
||||
@staticmethod
|
||||
def get_all_listings() -> List["Listing"]:
|
||||
listing_paths = sorted(list(_DATA_DIR.glob("*/listing.json")))
|
||||
def get_all_listings(self) -> List["Listing"]:
|
||||
listing_paths = sorted(list(self.data_dir.glob("*/listing.json")))
|
||||
identifiers = []
|
||||
for listing_path in listing_paths:
|
||||
with open(listing_path) as f:
|
||||
|
|
@ -26,7 +24,7 @@ class Listing:
|
|||
return identifiers
|
||||
|
||||
def path_listing(self) -> pathlib.Path:
|
||||
p = _DATA_DIR / str(self.identifier)
|
||||
p = self.data_dir / str(self.identifier)
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
return p
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import pathlib
|
||||
import click
|
||||
import importlib
|
||||
|
||||
|
|
@ -31,9 +32,23 @@ def cli():
|
|||
type=click.Choice(get_districts().keys(), case_sensitive=False),
|
||||
multiple=True,
|
||||
)
|
||||
def dump_listings(district: list[str]):
|
||||
click.echo(f'Running dump_listings for districts {district}')
|
||||
dump_listings_module.dump_listings(set(district))
|
||||
@click.option(
|
||||
'--data-dir',
|
||||
default=pathlib.Path("data/rs/"),
|
||||
help='Districts to scrape',
|
||||
type=click.Path(
|
||||
writable=True,
|
||||
file_okay=False,
|
||||
dir_okay=True,
|
||||
resolve_path=True,
|
||||
),
|
||||
)
|
||||
def dump_listings(district: list[str], data_dir: str):
|
||||
click.echo(
|
||||
f'Running dump_listings for districts {district} and data dir {data_dir}'
|
||||
)
|
||||
data_dir_path = pathlib.Path(data_dir)
|
||||
dump_listings_module.dump_listings(set(district), data_dir_path)
|
||||
|
||||
|
||||
@cli.command()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue