parameterize dump_detail to use a custom data dir and also move data dir param as part of the click context
This commit is contained in:
parent
48d379567b
commit
e424361ed9
3 changed files with 32 additions and 21 deletions
|
|
@ -1,13 +1,14 @@
|
||||||
import json
|
import json
|
||||||
|
import pathlib
|
||||||
from rec.query import detail_query
|
from rec.query import detail_query
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
|
|
||||||
def dump_detail():
|
def dump_detail(listing_paths: list[str]):
|
||||||
incremental = True
|
incremental = True
|
||||||
listings = Listing.get_all_listings()
|
listings = Listing.get_all_listings(listing_paths)
|
||||||
filtered_listings = []
|
filtered_listings = []
|
||||||
for listing in listings:
|
for listing in listings:
|
||||||
# We introduced last_seen later, so not all entries have it.
|
# We introduced last_seen later, so not all entries have it.
|
||||||
|
|
@ -32,7 +33,8 @@ def dump_detail():
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
dump_detail()
|
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
||||||
|
dump_detail(listing_paths)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,8 @@ class Listing:
|
||||||
_cached: Dict = None
|
_cached: Dict = None
|
||||||
data_dir: pathlib.Path = pathlib.Path("data/rs/")
|
data_dir: pathlib.Path = pathlib.Path("data/rs/")
|
||||||
|
|
||||||
def get_all_listings(self) -> List["Listing"]:
|
@staticmethod
|
||||||
listing_paths = sorted(list(self.data_dir.glob("*/listing.json")))
|
def get_all_listings(listing_paths: list[str]) -> List["Listing"]:
|
||||||
identifiers = []
|
identifiers = []
|
||||||
for listing_path in listing_paths:
|
for listing_path in listing_paths:
|
||||||
with open(listing_path) as f:
|
with open(listing_path) as f:
|
||||||
|
|
@ -294,5 +294,6 @@ class Listing:
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
||||||
listings = Listing.get_all_listings()
|
listings = Listing.get_all_listings()
|
||||||
print(listings[0].list_floorplans())
|
print(listings[0].list_floorplans())
|
||||||
|
|
|
||||||
|
|
@ -20,18 +20,6 @@ steps_to_handlers = {
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def cli():
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
|
||||||
@click.option(
|
|
||||||
'--district',
|
|
||||||
default=None,
|
|
||||||
help='Districts to scrape',
|
|
||||||
type=click.Choice(get_districts().keys(), case_sensitive=False),
|
|
||||||
multiple=True,
|
|
||||||
)
|
|
||||||
@click.option(
|
@click.option(
|
||||||
'--data-dir',
|
'--data-dir',
|
||||||
default=pathlib.Path("data/rs/"),
|
default=pathlib.Path("data/rs/"),
|
||||||
|
|
@ -43,7 +31,24 @@ def cli():
|
||||||
resolve_path=True,
|
resolve_path=True,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def dump_listings(district: list[str], data_dir: str):
|
@click.pass_context
|
||||||
|
def cli(ctx, data_dir: str):
|
||||||
|
ctx.ensure_object(dict)
|
||||||
|
ctx.obj['data_dir'] = data_dir
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.option(
|
||||||
|
'--district',
|
||||||
|
default=None,
|
||||||
|
help='Districts to scrape',
|
||||||
|
type=click.Choice(get_districts().keys(), case_sensitive=False),
|
||||||
|
multiple=True,
|
||||||
|
)
|
||||||
|
@click.pass_context
|
||||||
|
def dump_listings(ctx: click.core.Context, district: list[str]):
|
||||||
|
data_dir: str = ctx.obj['data_dir']
|
||||||
click.echo(
|
click.echo(
|
||||||
f'Running dump_listings for districts {district} and data dir {data_dir}'
|
f'Running dump_listings for districts {district} and data dir {data_dir}'
|
||||||
)
|
)
|
||||||
|
|
@ -52,9 +57,12 @@ def dump_listings(district: list[str], data_dir: str):
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
def dump_detail():
|
@click.pass_context
|
||||||
click.echo('Running dump_detail')
|
def dump_detail(ctx: click.core.Context):
|
||||||
dump_detail_module.dump_detail()
|
data_dir = ctx.obj['data_dir']
|
||||||
|
click.echo(f'Running dump_detail for listings stored in {data_dir}')
|
||||||
|
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||||
|
dump_detail_module.dump_detail(listing_paths)
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue