Rewriting 1_dump to enabling crawling of more real estate
This commit is contained in:
parent
40285245d5
commit
5720e68547
2 changed files with 63 additions and 30 deletions
|
|
@ -1,21 +1,38 @@
|
||||||
from rec.query import listing_query
|
from rec.query import listing_query
|
||||||
|
from rec.districts import get_districts
|
||||||
import pathlib
|
import pathlib
|
||||||
import json
|
import json
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
d = listing_query(1, 3, 3, 15, 0, 800000, max_days_since_added=7)
|
|
||||||
folder = pathlib.Path("data/rs/")
|
folder = pathlib.Path("data/rs/")
|
||||||
|
districts = get_districts()
|
||||||
|
|
||||||
for i in range(1, 10000):
|
for district, locid in districts.items():
|
||||||
try:
|
print("#### District:", district)
|
||||||
print(f"page {i}")
|
for i in range(1, 41):
|
||||||
d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=1)
|
try:
|
||||||
except:
|
d = listing_query(
|
||||||
break
|
page=i,
|
||||||
|
min_bedrooms=1,
|
||||||
|
max_bedrooms=3,
|
||||||
|
radius=0,
|
||||||
|
min_price=0,
|
||||||
|
max_price=800000,
|
||||||
|
location_id=locid,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
break
|
||||||
|
if i == 1:
|
||||||
|
print("totalAvailableResults: ", d["totalAvailableResults"])
|
||||||
|
if len(d["properties"]) == 0:
|
||||||
|
break
|
||||||
|
print(f"page {i}", end=", ", flush=True)
|
||||||
|
|
||||||
for property in d["properties"]:
|
for property in d["properties"]:
|
||||||
identifier = property["identifier"]
|
identifier = property["identifier"]
|
||||||
|
|
||||||
listing = Listing(identifier)
|
listing = Listing(identifier)
|
||||||
with open(listing.path_listing_json(), "w") as f:
|
with open(listing.path_listing_json(), "w") as f:
|
||||||
json.dump(property, f)
|
json.dump(property, f)
|
||||||
|
print() # break line as we used end=, above.
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,35 @@
|
||||||
|
- [ ] Partition query further as each query can listing query can only grab a 1000 entries at most. If the query is too broad, it will fail afterwards.
|
||||||
|
|
||||||
- [ ] Update capability of the database
|
- District: City of London, totalAvailableResults: 60
|
||||||
- [ ] Check if the entry already exists in the database
|
- District: Greenwich, totalAvailableResults: 1371
|
||||||
|
- District: Hillingdon, totalAvailableResults: 1026
|
||||||
|
- District: Ealing, totalAvailableResults: 1736
|
||||||
- [ ] Crawl single entry
|
- District: Richmond upon Thames, totalAvailableResults: 819
|
||||||
- [ ] Download pictures and map to database entry
|
- District: Sutton, totalAvailableResults: 664
|
||||||
|
- District: Wandsworth, totalAvailableResults: 1824
|
||||||
|
- District: Camden, totalAvailableResults: 801
|
||||||
|
- District: Enfield, totalAvailableResults: 1056
|
||||||
|
- District: Croydon, totalAvailableResults: 1865
|
||||||
# Distance measuring
|
- District: Hackney, totalAvailableResults: 840
|
||||||
- [ ] route api to find shortest path apartments
|
- District: Kingston upon Thames, totalAvailableResults: 685
|
||||||
- [ ] Switch to distance matrix api. Evaluate if its the right one.
|
- District: Kensington and Chelsea, totalAvailableResults: 658
|
||||||
|
- District: Bromley, totalAvailableResults: 1341
|
||||||
# sqm measure
|
- District: Brent, totalAvailableResults: 1332
|
||||||
- [ ] OCR to detect the area of the apartment
|
- District: Waltham Forest, totalAvailableResults: 763
|
||||||
|
- District: Southwark, totalAvailableResults: 1460
|
||||||
|
- District: Harrow, totalAvailableResults: 948
|
||||||
|
- District: Lewisham, totalAvailableResults: 1192
|
||||||
|
- District: Barnet, totalAvailableResults: 1683
|
||||||
|
- District: Islington, totalAvailableResults: 766
|
||||||
|
- District: Haringey, totalAvailableResults: 795
|
||||||
|
- District: Lambeth, totalAvailableResults: 1626
|
||||||
|
- District: Westminster, totalAvailableResults: 1130
|
||||||
|
- District: Tower Hamlets, totalAvailableResults: 2213
|
||||||
|
- District: Havering, totalAvailableResults: 863
|
||||||
|
- District: Barking and Dagenham, totalAvailableResults: 485
|
||||||
|
- District: Hammersmith and Fulham, totalAvailableResults: 1038
|
||||||
|
- District: Bexley, totalAvailableResults: 803
|
||||||
|
- District: Redbridge, totalAvailableResults: 720
|
||||||
|
- District: Newham, totalAvailableResults: 1306
|
||||||
|
- District: Merton, totalAvailableResults: 873
|
||||||
|
- District: Hounslow, totalAvailableResults: 1096
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue