[3/n] click-ify add dump images command

run with
poetry run python main.py --step dump_images
This commit is contained in:
Viktor Barzin 2025-05-11 19:04:19 +00:00
parent c2196c15c1
commit 70e8ef9f95
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
2 changed files with 36 additions and 24 deletions

View file

@ -3,29 +3,39 @@ from urllib.request import urlretrieve
from tqdm import tqdm
from data_access import Listing
for listing in tqdm(Listing.get_all_listings()):
with open(listing.path_detail_json()) as f:
detail = json.load(f)
# for photo in detail["property"]["photos"]:
# url = photo["maxSizeUrl"]
# picname = url.split("/")[-1]
# order = photo["order"]
# p = listing.path_pic_file(order, picname)
# if p.exists():
# continue
# tqdm.write(str(p))
# urlretrieve(url, p)
def dump_images():
for listing in tqdm(Listing.get_all_listings()):
with open(listing.path_detail_json()) as f:
detail = json.load(f)
for photo in detail["property"]["floorplans"]:
url = photo["url"]
picname = url.split("/")[-1]
order = photo["order"]
p = listing.path_floorplan_file(order, picname)
if p.exists():
continue
tqdm.write(str(p))
try:
urlretrieve(url, p)
except:
tqdm.write(f"404 for {url}")
# for photo in detail["property"]["photos"]:
# url = photo["maxSizeUrl"]
# picname = url.split("/")[-1]
# order = photo["order"]
# p = listing.path_pic_file(order, picname)
# if p.exists():
# continue
# tqdm.write(str(p))
# urlretrieve(url, p)
for photo in detail["property"]["floorplans"]:
url = photo["url"]
picname = url.split("/")[-1]
order = photo["order"]
p = listing.path_floorplan_file(order, picname)
if p.exists():
continue
tqdm.write(str(p))
try:
urlretrieve(url, p)
except:
tqdm.write(f"404 for {url}")
def main():
dump_images()
if __name__ == "__main__":
main()

View file

@ -3,10 +3,12 @@ import importlib
dump_listings_module = importlib.import_module('1_dump_listings')
dump_detail_module = importlib.import_module('2_dump_detail')
dump_images_module = importlib.import_module('3_dump_images')
steps_to_handlers = {
'dump_listings': dump_listings_module.dump_listings,
'dump_detail': dump_detail_module.dump_detail,
'dump_images': dump_images_module.dump_images,
}