crawling for 3 and refactoring to allow incremental crawls
This commit is contained in:
parent
de2639f9c3
commit
36258d877f
5 changed files with 310 additions and 167 deletions
|
|
@ -1,22 +1,21 @@
|
|||
from rec.query import listing_query
|
||||
import pathlib
|
||||
import json
|
||||
from data_access import Listing
|
||||
|
||||
d = listing_query(1, 1, 2, 15, 0, 800000)
|
||||
d = listing_query(1, 3, 3, 15, 0, 800000)
|
||||
folder = pathlib.Path("data/rs/")
|
||||
|
||||
for i in range(1, 10000):
|
||||
try:
|
||||
print(f"page {i}")
|
||||
d = listing_query(i, 1, 2, 15, 0, 800000)
|
||||
d = listing_query(i, 3, 3, 15, 0, 800000)
|
||||
except:
|
||||
break
|
||||
|
||||
for property in d['properties']:
|
||||
identifier = property['identifier']
|
||||
listing_folder = folder / str(identifier)
|
||||
listing_folder.mkdir(exist_ok=True, parents=True)
|
||||
listing_path = listing_folder / f"listing.json"
|
||||
with open(listing_path, 'w') as f:
|
||||
|
||||
listing = Listing(identifier)
|
||||
with open(listing.path_listing_json(), 'w') as f:
|
||||
json.dump(property, f)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue