dump images using aiohttp and concurrently
This commit is contained in:
parent
01ac24b4b7
commit
68cc70bd11
2 changed files with 30 additions and 28 deletions
|
|
@ -19,8 +19,7 @@ async def dump_detail(listing_paths: list[str]):
|
||||||
|
|
||||||
|
|
||||||
async def _dump_detail_for_listing(listing: Listing):
|
async def _dump_detail_for_listing(listing: Listing):
|
||||||
incremental = True
|
if listing.path_detail_json().exists():
|
||||||
if incremental and not listing.path_detail_json().exists():
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# for listing in tqdm(filtered_listings):
|
# for listing in tqdm(filtered_listings):
|
||||||
|
|
|
||||||
|
|
@ -1,37 +1,40 @@
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
from urllib.request import urlretrieve
|
import aiohttp
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
|
# Setting this too high either crashes rightmove or gets us blocked
|
||||||
|
semaphore = asyncio.Semaphore(10)
|
||||||
|
|
||||||
def dump_images(listing_paths: list[str]):
|
|
||||||
for listing in tqdm(Listing.get_all_listings(listing_paths)):
|
|
||||||
with open(listing.path_detail_json()) as f:
|
|
||||||
detail = json.load(f)
|
|
||||||
|
|
||||||
# for photo in detail["property"]["photos"]:
|
async def dump_images(listing_paths: list[str]):
|
||||||
# url = photo["maxSizeUrl"]
|
listings = Listing.get_all_listings(listing_paths)
|
||||||
# picname = url.split("/")[-1]
|
await tqdm.gather(*[dump_images_for_listing(listing) for listing in listings])
|
||||||
# order = photo["order"]
|
|
||||||
# p = listing.path_pic_file(order, picname)
|
|
||||||
# if p.exists():
|
|
||||||
# continue
|
|
||||||
# tqdm.write(str(p))
|
|
||||||
# urlretrieve(url, p)
|
|
||||||
|
|
||||||
for photo in detail["property"]["floorplans"]:
|
|
||||||
url = photo["url"]
|
async def dump_images_for_listing(listing: Listing):
|
||||||
picname = url.split("/")[-1]
|
with open(listing.path_detail_json()) as f:
|
||||||
order = photo["order"]
|
detail = json.load(f)
|
||||||
p = listing.path_floorplan_file(order, picname)
|
|
||||||
if p.exists():
|
for photo in detail["property"]["floorplans"]:
|
||||||
continue
|
url = photo["url"]
|
||||||
tqdm.write(str(p))
|
picname = url.split("/")[-1]
|
||||||
try:
|
order = photo["order"]
|
||||||
urlretrieve(url, p)
|
p = listing.path_floorplan_file(order, picname)
|
||||||
except Exception as e:
|
if p.exists():
|
||||||
tqdm.write(f"Error for {url}: {e}")
|
continue
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with semaphore:
|
||||||
|
async with session.get(url) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
raise Exception(f"Error for {url}: {response.status}")
|
||||||
|
with open(p, "wb") as f:
|
||||||
|
f.write(await response.read())
|
||||||
|
except Exception as e:
|
||||||
|
tqdm.write(f"Error for {url}: {e}")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue