wrongmove/crawler/2_dump_detail.py
Viktor Barzin c2196c15c1
[2/n] click-ify - add 2_dump_detail command
run with
poetry run python main.py --step dump_detail
2025-05-11 19:02:23 +00:00

39 lines
1,006 B
Python

import json
from rec.query import detail_query
from tqdm import tqdm
from data_access import Listing
def dump_detail():
incremental = True
listings = Listing.get_all_listings()
filtered_listings = []
for listing in listings:
# We introduced last_seen later, so not all entries have it.
# If it doesnt exist then its on the platform anymore. So skip
last_seen = listing.last_seen
if last_seen is None:
continue
if not incremental and last_seen <= 1:
filtered_listings.append(listing)
if incremental and not listing.path_detail_json().exists():
filtered_listings.append(listing)
for listing in tqdm(filtered_listings):
try:
d = detail_query(listing.identifier)
with open(listing.path_detail_json(), "w") as f:
json.dump(d, f)
except Exception as e:
print(e)
def main():
dump_detail()
if __name__ == "__main__":
main()