parameterize dump_detail to use a custom data dir and also move data dir param as part of the click context

This commit is contained in:
Viktor Barzin 2025-05-14 20:32:37 +00:00
parent 48d379567b
commit e424361ed9
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 32 additions and 21 deletions

View file

@ -1,13 +1,14 @@
import json
import pathlib
from rec.query import detail_query
from tqdm import tqdm
from data_access import Listing
def dump_detail():
def dump_detail(listing_paths: list[str]):
incremental = True
listings = Listing.get_all_listings()
listings = Listing.get_all_listings(listing_paths)
filtered_listings = []
for listing in listings:
# We introduced last_seen later, so not all entries have it.
@ -32,7 +33,8 @@ def dump_detail():
def main():
dump_detail()
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
dump_detail(listing_paths)
if __name__ == "__main__":

View file

@ -13,8 +13,8 @@ class Listing:
_cached: Dict = None
data_dir: pathlib.Path = pathlib.Path("data/rs/")
def get_all_listings(self) -> List["Listing"]:
listing_paths = sorted(list(self.data_dir.glob("*/listing.json")))
@staticmethod
def get_all_listings(listing_paths: list[str]) -> List["Listing"]:
identifiers = []
for listing_path in listing_paths:
with open(listing_path) as f:
@ -294,5 +294,6 @@ class Listing:
if __name__ == "__main__":
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
listings = Listing.get_all_listings()
print(listings[0].list_floorplans())

View file

@ -20,18 +20,6 @@ steps_to_handlers = {
@click.group()
def cli():
pass
@cli.command()
@click.option(
'--district',
default=None,
help='Districts to scrape',
type=click.Choice(get_districts().keys(), case_sensitive=False),
multiple=True,
)
@click.option(
'--data-dir',
default=pathlib.Path("data/rs/"),
@ -43,7 +31,24 @@ def cli():
resolve_path=True,
),
)
def dump_listings(district: list[str], data_dir: str):
@click.pass_context
def cli(ctx, data_dir: str):
ctx.ensure_object(dict)
ctx.obj['data_dir'] = data_dir
pass
@cli.command()
@click.option(
'--district',
default=None,
help='Districts to scrape',
type=click.Choice(get_districts().keys(), case_sensitive=False),
multiple=True,
)
@click.pass_context
def dump_listings(ctx: click.core.Context, district: list[str]):
data_dir: str = ctx.obj['data_dir']
click.echo(
f'Running dump_listings for districts {district} and data dir {data_dir}'
)
@ -52,9 +57,12 @@ def dump_listings(district: list[str], data_dir: str):
@cli.command()
def dump_detail():
click.echo('Running dump_detail')
dump_detail_module.dump_detail()
@click.pass_context
def dump_detail(ctx: click.core.Context):
data_dir = ctx.obj['data_dir']
click.echo(f'Running dump_detail for listings stored in {data_dir}')
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
dump_detail_module.dump_detail(listing_paths)
@cli.command()