45 lines
1.2 KiB
Python
45 lines
1.2 KiB
Python
import asyncio
|
|
import json
|
|
import pathlib
|
|
from rec.query import detail_query
|
|
from tqdm.asyncio import tqdm
|
|
|
|
from data_access import Listing
|
|
|
|
|
|
async def dump_detail(listing_paths: list[str]):
|
|
listings = Listing.get_all_listings(listing_paths)
|
|
filtered_listings = await tqdm.gather(
|
|
*[_dump_detail_for_listing(listing) for listing in listings]
|
|
)
|
|
return filtered_listings
|
|
|
|
|
|
async def _dump_detail_for_listing(listing: Listing):
|
|
incremental = True
|
|
# We introduced last_seen later, so not all entries have it.
|
|
# If it doesnt exist then its on the platform anymore. So skip
|
|
last_seen = listing.last_seen
|
|
if last_seen is None:
|
|
return
|
|
|
|
if not incremental and last_seen <= 1:
|
|
return
|
|
|
|
if incremental and not listing.path_detail_json().exists():
|
|
return
|
|
print('fetching', listing.identifier)
|
|
|
|
# for listing in tqdm(filtered_listings):
|
|
d = await detail_query(listing.identifier)
|
|
with open(listing.path_detail_json(), "w") as f:
|
|
json.dump(d, f)
|
|
|
|
|
|
def main():
|
|
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
|
dump_detail(listing_paths)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|