make dumping details async
This commit is contained in:
parent
ad879f2d4f
commit
3e7a144fb4
3 changed files with 27 additions and 23 deletions
|
|
@ -1,35 +1,39 @@
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
from rec.query import detail_query
|
from rec.query import detail_query
|
||||||
from tqdm import tqdm
|
from tqdm.asyncio import tqdm
|
||||||
|
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
|
|
||||||
def dump_detail(listing_paths: list[str]):
|
async def dump_detail(listing_paths: list[str]):
|
||||||
incremental = True
|
|
||||||
listings = Listing.get_all_listings(listing_paths)
|
listings = Listing.get_all_listings(listing_paths)
|
||||||
filtered_listings = []
|
filtered_listings = await tqdm.gather(
|
||||||
for listing in listings:
|
*[_dump_detail_for_listing(listing) for listing in listings]
|
||||||
# We introduced last_seen later, so not all entries have it.
|
)
|
||||||
# If it doesnt exist then its on the platform anymore. So skip
|
return filtered_listings
|
||||||
last_seen = listing.last_seen
|
|
||||||
if last_seen is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not incremental and last_seen <= 1:
|
|
||||||
filtered_listings.append(listing)
|
|
||||||
|
|
||||||
if incremental and not listing.path_detail_json().exists():
|
async def _dump_detail_for_listing(listing: Listing):
|
||||||
filtered_listings.append(listing)
|
incremental = True
|
||||||
|
# We introduced last_seen later, so not all entries have it.
|
||||||
|
# If it doesnt exist then its on the platform anymore. So skip
|
||||||
|
last_seen = listing.last_seen
|
||||||
|
if last_seen is None:
|
||||||
|
return
|
||||||
|
|
||||||
for listing in tqdm(filtered_listings):
|
if not incremental and last_seen <= 1:
|
||||||
try:
|
return
|
||||||
d = detail_query(listing.identifier)
|
|
||||||
with open(listing.path_detail_json(), "w") as f:
|
if incremental and not listing.path_detail_json().exists():
|
||||||
json.dump(d, f)
|
return
|
||||||
except Exception as e:
|
print('fetching', listing.identifier)
|
||||||
print(e)
|
|
||||||
|
# for listing in tqdm(filtered_listings):
|
||||||
|
d = await detail_query(listing.identifier)
|
||||||
|
with open(listing.path_detail_json(), "w") as f:
|
||||||
|
json.dump(d, f)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@ def dump_details(ctx: click.core.Context):
|
||||||
data_dir = ctx.obj['data_dir']
|
data_dir = ctx.obj['data_dir']
|
||||||
click.echo(f'Running dump_detail for listings stored in {data_dir}')
|
click.echo(f'Running dump_detail for listings stored in {data_dir}')
|
||||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||||
dump_detail_module.dump_detail(listing_paths)
|
asyncio.run(dump_detail_module.dump_detail(listing_paths))
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ class PropertyType(enum.StrEnum):
|
||||||
TERRACED = "terraced"
|
TERRACED = "terraced"
|
||||||
|
|
||||||
|
|
||||||
def detail_query(detail_id: int):
|
async def detail_query(detail_id: int):
|
||||||
params = {
|
params = {
|
||||||
"apiApplication": "ANDROID",
|
"apiApplication": "ANDROID",
|
||||||
"appVersion": "3.70.0",
|
"appVersion": "3.70.0",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue