Rewriting 1_dump to enabling crawling of more real estate
This commit is contained in:
parent
40285245d5
commit
5720e68547
2 changed files with 63 additions and 30 deletions
|
|
@ -1,21 +1,38 @@
|
|||
from rec.query import listing_query
|
||||
from rec.districts import get_districts
|
||||
import pathlib
|
||||
import json
|
||||
from data_access import Listing
|
||||
|
||||
d = listing_query(1, 3, 3, 15, 0, 800000, max_days_since_added=7)
|
||||
folder = pathlib.Path("data/rs/")
|
||||
districts = get_districts()
|
||||
|
||||
for i in range(1, 10000):
|
||||
try:
|
||||
print(f"page {i}")
|
||||
d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=1)
|
||||
except:
|
||||
break
|
||||
for district, locid in districts.items():
|
||||
print("#### District:", district)
|
||||
for i in range(1, 41):
|
||||
try:
|
||||
d = listing_query(
|
||||
page=i,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=3,
|
||||
radius=0,
|
||||
min_price=0,
|
||||
max_price=800000,
|
||||
location_id=locid,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
break
|
||||
if i == 1:
|
||||
print("totalAvailableResults: ", d["totalAvailableResults"])
|
||||
if len(d["properties"]) == 0:
|
||||
break
|
||||
print(f"page {i}", end=", ", flush=True)
|
||||
|
||||
for property in d["properties"]:
|
||||
identifier = property["identifier"]
|
||||
for property in d["properties"]:
|
||||
identifier = property["identifier"]
|
||||
|
||||
listing = Listing(identifier)
|
||||
with open(listing.path_listing_json(), "w") as f:
|
||||
json.dump(property, f)
|
||||
listing = Listing(identifier)
|
||||
with open(listing.path_listing_json(), "w") as f:
|
||||
json.dump(property, f)
|
||||
print() # break line as we used end=, above.
|
||||
|
|
|
|||
|
|
@ -1,19 +1,35 @@
|
|||
- [ ] Partition query further as each query can listing query can only grab a 1000 entries at most. If the query is too broad, it will fail afterwards.
|
||||
|
||||
- [ ] Update capability of the database
|
||||
- [ ] Check if the entry already exists in the database
|
||||
|
||||
|
||||
- [ ] Crawl single entry
|
||||
- [ ] Download pictures and map to database entry
|
||||
|
||||
|
||||
|
||||
|
||||
# Distance measuring
|
||||
- [ ] route api to find shortest path apartments
|
||||
- [ ] Switch to distance matrix api. Evaluate if its the right one.
|
||||
|
||||
# sqm measure
|
||||
- [ ] OCR to detect the area of the apartment
|
||||
|
||||
|
||||
- District: City of London, totalAvailableResults: 60
|
||||
- District: Greenwich, totalAvailableResults: 1371
|
||||
- District: Hillingdon, totalAvailableResults: 1026
|
||||
- District: Ealing, totalAvailableResults: 1736
|
||||
- District: Richmond upon Thames, totalAvailableResults: 819
|
||||
- District: Sutton, totalAvailableResults: 664
|
||||
- District: Wandsworth, totalAvailableResults: 1824
|
||||
- District: Camden, totalAvailableResults: 801
|
||||
- District: Enfield, totalAvailableResults: 1056
|
||||
- District: Croydon, totalAvailableResults: 1865
|
||||
- District: Hackney, totalAvailableResults: 840
|
||||
- District: Kingston upon Thames, totalAvailableResults: 685
|
||||
- District: Kensington and Chelsea, totalAvailableResults: 658
|
||||
- District: Bromley, totalAvailableResults: 1341
|
||||
- District: Brent, totalAvailableResults: 1332
|
||||
- District: Waltham Forest, totalAvailableResults: 763
|
||||
- District: Southwark, totalAvailableResults: 1460
|
||||
- District: Harrow, totalAvailableResults: 948
|
||||
- District: Lewisham, totalAvailableResults: 1192
|
||||
- District: Barnet, totalAvailableResults: 1683
|
||||
- District: Islington, totalAvailableResults: 766
|
||||
- District: Haringey, totalAvailableResults: 795
|
||||
- District: Lambeth, totalAvailableResults: 1626
|
||||
- District: Westminster, totalAvailableResults: 1130
|
||||
- District: Tower Hamlets, totalAvailableResults: 2213
|
||||
- District: Havering, totalAvailableResults: 863
|
||||
- District: Barking and Dagenham, totalAvailableResults: 485
|
||||
- District: Hammersmith and Fulham, totalAvailableResults: 1038
|
||||
- District: Bexley, totalAvailableResults: 803
|
||||
- District: Redbridge, totalAvailableResults: 720
|
||||
- District: Newham, totalAvailableResults: 1306
|
||||
- District: Merton, totalAvailableResults: 873
|
||||
- District: Hounslow, totalAvailableResults: 1096
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue