crawling for 3 and refactoring to allow incremental crawls
This commit is contained in:
parent
de2639f9c3
commit
36258d877f
5 changed files with 310 additions and 167 deletions
|
|
@ -1,25 +1,17 @@
|
|||
import pathlib
|
||||
import json
|
||||
from rec.query import detail_query
|
||||
from tqdm import tqdm
|
||||
|
||||
folder = pathlib.Path('data/rs/')
|
||||
listings = folder.glob('*/listing.json')
|
||||
from data_access import Listing
|
||||
|
||||
for listing_path in listings:
|
||||
with open(listing_path) as f:
|
||||
listing = json.load(f)
|
||||
identifier = listing['identifier']
|
||||
for listing in tqdm(Listing.get_all_listings()):
|
||||
if listing.path_detail_json().exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
d = detail_query(identifier)
|
||||
d = detail_query(listing.identifier)
|
||||
with open(listing.path_detail_json(), 'w') as f:
|
||||
json.dump(d, f)
|
||||
except:
|
||||
print('Failed at: ', identifier)
|
||||
print('Failed at: ', listing.identifier)
|
||||
raise
|
||||
print(identifier)
|
||||
|
||||
detail_path = pathlib.Path(f'data/rs/{identifier}/detail.json')
|
||||
with open(detail_path, 'w') as f:
|
||||
json.dump(d, f)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue