wrongmove/crawler/3_dump_images.py

43 lines
1.3 KiB
Python

import json
import pathlib
from urllib.request import urlretrieve
from tqdm import tqdm
from data_access import Listing
def dump_images(listing_paths: list[str]):
for listing in tqdm(Listing.get_all_listings(listing_paths)):
with open(listing.path_detail_json()) as f:
detail = json.load(f)
# for photo in detail["property"]["photos"]:
# url = photo["maxSizeUrl"]
# picname = url.split("/")[-1]
# order = photo["order"]
# p = listing.path_pic_file(order, picname)
# if p.exists():
# continue
# tqdm.write(str(p))
# urlretrieve(url, p)
for photo in detail["property"]["floorplans"]:
url = photo["url"]
picname = url.split("/")[-1]
order = photo["order"]
p = listing.path_floorplan_file(order, picname)
if p.exists():
continue
tqdm.write(str(p))
try:
urlretrieve(url, p)
except Exception as e:
tqdm.write(f"Error for {url}: {e}")
def main():
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
dump_images(listing_paths)
if __name__ == "__main__":
main()