parameterize dump images step to work with custom data paths

This commit is contained in:
Viktor Barzin 2025-05-14 21:01:58 +00:00
parent e424361ed9
commit 07fef7fbab
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 19 additions and 9 deletions

View file

@ -1,11 +1,12 @@
import json
import pathlib
from urllib.request import urlretrieve
from tqdm import tqdm
from data_access import Listing
def dump_images():
for listing in tqdm(Listing.get_all_listings()):
def dump_images(listing_paths: list[str]):
for listing in tqdm(Listing.get_all_listings(listing_paths)):
with open(listing.path_detail_json()) as f:
detail = json.load(f)
@ -29,12 +30,13 @@ def dump_images():
tqdm.write(str(p))
try:
urlretrieve(url, p)
except:
tqdm.write(f"404 for {url}")
except Exception as e:
tqdm.write(f"Error for {url}: {e}")
def main():
dump_images()
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
dump_images(listing_paths)
if __name__ == "__main__":