parameterize dump images step to work with custom data paths
This commit is contained in:
parent
e424361ed9
commit
07fef7fbab
3 changed files with 19 additions and 9 deletions
|
|
@ -1,11 +1,12 @@
|
||||||
import json
|
import json
|
||||||
|
import pathlib
|
||||||
from urllib.request import urlretrieve
|
from urllib.request import urlretrieve
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
|
|
||||||
def dump_images():
|
def dump_images(listing_paths: list[str]):
|
||||||
for listing in tqdm(Listing.get_all_listings()):
|
for listing in tqdm(Listing.get_all_listings(listing_paths)):
|
||||||
with open(listing.path_detail_json()) as f:
|
with open(listing.path_detail_json()) as f:
|
||||||
detail = json.load(f)
|
detail = json.load(f)
|
||||||
|
|
||||||
|
|
@ -29,12 +30,13 @@ def dump_images():
|
||||||
tqdm.write(str(p))
|
tqdm.write(str(p))
|
||||||
try:
|
try:
|
||||||
urlretrieve(url, p)
|
urlretrieve(url, p)
|
||||||
except:
|
except Exception as e:
|
||||||
tqdm.write(f"404 for {url}")
|
tqdm.write(f"Error for {url}: {e}")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
dump_images()
|
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
||||||
|
dump_images(listing_paths)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,12 @@ class Listing:
|
||||||
for listing_path in listing_paths:
|
for listing_path in listing_paths:
|
||||||
with open(listing_path) as f:
|
with open(listing_path) as f:
|
||||||
d = json.load(f)
|
d = json.load(f)
|
||||||
identifiers.append(Listing(d["identifier"]))
|
|
||||||
|
# data_dir is the first directory before the listing_path
|
||||||
|
data_dir = pathlib.Path(listing_path)
|
||||||
|
while str(d['identifier']) in str(data_dir.resolve().absolute()):
|
||||||
|
data_dir = data_dir.parent
|
||||||
|
identifiers.append(Listing(d["identifier"], data_dir=data_dir))
|
||||||
|
|
||||||
return identifiers
|
return identifiers
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -66,9 +66,12 @@ def dump_detail(ctx: click.core.Context):
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
def dump_images():
|
@click.pass_context
|
||||||
click.echo('Running dump_images')
|
def dump_images(ctx: click.core.Context):
|
||||||
dump_images_module.dump_images()
|
data_dir = ctx.obj['data_dir']
|
||||||
|
click.echo(f'Running dump_images stored in {data_dir}')
|
||||||
|
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||||
|
dump_images_module.dump_images(listing_paths)
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue