wrongmove/crawler/3_dump_images.py
Viktor Barzin 70e8ef9f95
[3/n] click-ify add dump images command
run with
poetry run python main.py --step dump_images
2025-05-11 19:04:19 +00:00

41 lines
1.1 KiB
Python

import json
from urllib.request import urlretrieve
from tqdm import tqdm
from data_access import Listing
def dump_images():
for listing in tqdm(Listing.get_all_listings()):
with open(listing.path_detail_json()) as f:
detail = json.load(f)
# for photo in detail["property"]["photos"]:
# url = photo["maxSizeUrl"]
# picname = url.split("/")[-1]
# order = photo["order"]
# p = listing.path_pic_file(order, picname)
# if p.exists():
# continue
# tqdm.write(str(p))
# urlretrieve(url, p)
for photo in detail["property"]["floorplans"]:
url = photo["url"]
picname = url.split("/")[-1]
order = photo["order"]
p = listing.path_floorplan_file(order, picname)
if p.exists():
continue
tqdm.write(str(p))
try:
urlretrieve(url, p)
except:
tqdm.write(f"404 for {url}")
def main():
dump_images()
if __name__ == "__main__":
main()