From 5305451fe8f6e2cdcf7341b901c45066addd50b1 Mon Sep 17 00:00:00 2001 From: Kadir Date: Mon, 1 Apr 2024 20:28:37 +0200 Subject: [PATCH] changing detail downloads to prefiltering first. Making the progress bar more accurate and frontloading --- crawler/2_dump_detail.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crawler/2_dump_detail.py b/crawler/2_dump_detail.py index 5b8bbd9..49ed4f3 100644 --- a/crawler/2_dump_detail.py +++ b/crawler/2_dump_detail.py @@ -4,11 +4,13 @@ from tqdm import tqdm from data_access import Listing -counter = 0 -for listing in tqdm(Listing.get_all_listings()): - if listing.path_detail_json().exists(): - continue - counter+=1 +listings = Listing.get_all_listings() +filtered_listings = [] +for listing in listings: + if not listing.path_detail_json().exists(): + filtered_listings.append(listing) + +for listing in tqdm(filtered_listings): try: d = detail_query(listing.identifier) with open(listing.path_detail_json(), "w") as f: @@ -16,5 +18,3 @@ for listing in tqdm(Listing.get_all_listings()): except: print("Failed at: ", listing.identifier) raise - -print('crawled new: ', counter) \ No newline at end of file