wrongmove/crawler/3_dump_images.py

42 lines
1.3 KiB
Python
Raw Normal View History

import pathlib
import json
from urllib.request import urlretrieve
from tqdm import tqdm
folder = pathlib.Path('data/rs/')
details = folder.glob('*/detail.json')
for detail_path in tqdm(list(details)):
with open(detail_path) as f:
detail = json.load(f)
identifier = detail['property']['identifier']
rsfolder = folder / str(identifier)
for photo in detail['property']['photos']:
url = photo['maxSizeUrl']
picname = url.split('/')[-1]
order = photo['order']
filename = f'{order}_{picname}'
fullpicpath = rsfolder / 'pics' / filename
if fullpicpath.exists():
continue
fullpicpath.parent.mkdir(parents=True, exist_ok=True) # create the 'pics' folder
tqdm.write(str(fullpicpath))
urlretrieve(url, fullpicpath)
for photo in detail['property']['floorplans']:
url = photo['url']
picname = url.split('/')[-1]
order = photo['order']
filename = f'{order}_{picname}'
fullpicpath = rsfolder / 'floorplans' / filename
if fullpicpath.exists():
continue
fullpicpath.parent.mkdir(parents=True, exist_ok=True) # create the 'floorplans' folder
tqdm.write(str(fullpicpath))
urlretrieve(url, fullpicpath)