diff --git a/crawler/2_dump_detail.py b/crawler/2_dump_detail.py index 5b8bbd9..49ed4f3 100644 --- a/crawler/2_dump_detail.py +++ b/crawler/2_dump_detail.py @@ -4,11 +4,13 @@ from tqdm import tqdm from data_access import Listing -counter = 0 -for listing in tqdm(Listing.get_all_listings()): - if listing.path_detail_json().exists(): - continue - counter+=1 +listings = Listing.get_all_listings() +filtered_listings = [] +for listing in listings: + if not listing.path_detail_json().exists(): + filtered_listings.append(listing) + +for listing in tqdm(filtered_listings): try: d = detail_query(listing.identifier) with open(listing.path_detail_json(), "w") as f: @@ -16,5 +18,3 @@ for listing in tqdm(Listing.get_all_listings()): except: print("Failed at: ", listing.identifier) raise - -print('crawled new: ', counter) \ No newline at end of file