parameterize dump images step to work with custom data paths
This commit is contained in:
parent
e424361ed9
commit
07fef7fbab
3 changed files with 19 additions and 9 deletions
|
|
@ -1,11 +1,12 @@
|
|||
import json
|
||||
import pathlib
|
||||
from urllib.request import urlretrieve
|
||||
from tqdm import tqdm
|
||||
from data_access import Listing
|
||||
|
||||
|
||||
def dump_images():
|
||||
for listing in tqdm(Listing.get_all_listings()):
|
||||
def dump_images(listing_paths: list[str]):
|
||||
for listing in tqdm(Listing.get_all_listings(listing_paths)):
|
||||
with open(listing.path_detail_json()) as f:
|
||||
detail = json.load(f)
|
||||
|
||||
|
|
@ -29,12 +30,13 @@ def dump_images():
|
|||
tqdm.write(str(p))
|
||||
try:
|
||||
urlretrieve(url, p)
|
||||
except:
|
||||
tqdm.write(f"404 for {url}")
|
||||
except Exception as e:
|
||||
tqdm.write(f"Error for {url}: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
dump_images()
|
||||
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
||||
dump_images(listing_paths)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue