Rewriting 1_dump to enabling crawling of more real estate
This commit is contained in:
parent
40285245d5
commit
5720e68547
2 changed files with 63 additions and 30 deletions
|
|
@ -1,21 +1,38 @@
|
|||
from rec.query import listing_query
|
||||
from rec.districts import get_districts
|
||||
import pathlib
|
||||
import json
|
||||
from data_access import Listing
|
||||
|
||||
d = listing_query(1, 3, 3, 15, 0, 800000, max_days_since_added=7)
|
||||
folder = pathlib.Path("data/rs/")
|
||||
districts = get_districts()
|
||||
|
||||
for i in range(1, 10000):
|
||||
try:
|
||||
print(f"page {i}")
|
||||
d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=1)
|
||||
except:
|
||||
break
|
||||
for district, locid in districts.items():
|
||||
print("#### District:", district)
|
||||
for i in range(1, 41):
|
||||
try:
|
||||
d = listing_query(
|
||||
page=i,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=3,
|
||||
radius=0,
|
||||
min_price=0,
|
||||
max_price=800000,
|
||||
location_id=locid,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
break
|
||||
if i == 1:
|
||||
print("totalAvailableResults: ", d["totalAvailableResults"])
|
||||
if len(d["properties"]) == 0:
|
||||
break
|
||||
print(f"page {i}", end=", ", flush=True)
|
||||
|
||||
for property in d["properties"]:
|
||||
identifier = property["identifier"]
|
||||
for property in d["properties"]:
|
||||
identifier = property["identifier"]
|
||||
|
||||
listing = Listing(identifier)
|
||||
with open(listing.path_listing_json(), "w") as f:
|
||||
json.dump(property, f)
|
||||
listing = Listing(identifier)
|
||||
with open(listing.path_listing_json(), "w") as f:
|
||||
json.dump(property, f)
|
||||
print() # break line as we used end=, above.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue