crawling for 3 and refactoring to allow incremental crawls

This commit is contained in:
Kadir 2024-03-11 14:43:53 +00:00
parent de2639f9c3
commit 36258d877f
5 changed files with 310 additions and 167 deletions

View file

@ -1,22 +1,21 @@
from rec.query import listing_query
import pathlib
import json
from data_access import Listing
d = listing_query(1, 1, 2, 15, 0, 800000)
d = listing_query(1, 3, 3, 15, 0, 800000)
folder = pathlib.Path("data/rs/")
for i in range(1, 10000):
try:
print(f"page {i}")
d = listing_query(i, 1, 2, 15, 0, 800000)
d = listing_query(i, 3, 3, 15, 0, 800000)
except:
break
for property in d['properties']:
identifier = property['identifier']
listing_folder = folder / str(identifier)
listing_folder.mkdir(exist_ok=True, parents=True)
listing_path = listing_folder / f"listing.json"
with open(listing_path, 'w') as f:
listing = Listing(identifier)
with open(listing.path_listing_json(), 'w') as f:
json.dump(property, f)