1. get all listings 2. get all detail jsons 3. get all images 4. get all floorplans 5. detecting floorplans Also updating dependencies for huggingface etc.
42 lines
No EOL
1.3 KiB
Python
42 lines
No EOL
1.3 KiB
Python
import pathlib
|
|
import json
|
|
from urllib.request import urlretrieve
|
|
from tqdm import tqdm
|
|
|
|
folder = pathlib.Path('data/rs/')
|
|
details = folder.glob('*/detail.json')
|
|
|
|
for detail_path in tqdm(list(details)):
|
|
|
|
with open(detail_path) as f:
|
|
detail = json.load(f)
|
|
|
|
identifier = detail['property']['identifier']
|
|
rsfolder = folder / str(identifier)
|
|
|
|
|
|
for photo in detail['property']['photos']:
|
|
url = photo['maxSizeUrl']
|
|
picname = url.split('/')[-1]
|
|
order = photo['order']
|
|
filename = f'{order}_{picname}'
|
|
fullpicpath = rsfolder / 'pics' / filename
|
|
if fullpicpath.exists():
|
|
continue
|
|
fullpicpath.parent.mkdir(parents=True, exist_ok=True) # create the 'pics' folder
|
|
tqdm.write(str(fullpicpath))
|
|
urlretrieve(url, fullpicpath)
|
|
|
|
for photo in detail['property']['floorplans']:
|
|
url = photo['url']
|
|
picname = url.split('/')[-1]
|
|
order = photo['order']
|
|
filename = f'{order}_{picname}'
|
|
fullpicpath = rsfolder / 'floorplans' / filename
|
|
if fullpicpath.exists():
|
|
continue
|
|
fullpicpath.parent.mkdir(parents=True, exist_ok=True) # create the 'floorplans' folder
|
|
tqdm.write(str(fullpicpath))
|
|
urlretrieve(url, fullpicpath)
|
|
|
|
|