diff --git a/crawler/1_dump_listings.py b/crawler/1_dump_listings.py
index 80e79b2..24f5cb4 100644
--- a/crawler/1_dump_listings.py
+++ b/crawler/1_dump_listings.py
@@ -1,22 +1,21 @@
from rec.query import listing_query
import pathlib
import json
+from data_access import Listing
-d = listing_query(1, 1, 2, 15, 0, 800000)
+d = listing_query(1, 3, 3, 15, 0, 800000)
folder = pathlib.Path("data/rs/")
for i in range(1, 10000):
try:
print(f"page {i}")
- d = listing_query(i, 1, 2, 15, 0, 800000)
+ d = listing_query(i, 3, 3, 15, 0, 800000)
except:
break
for property in d['properties']:
identifier = property['identifier']
- listing_folder = folder / str(identifier)
- listing_folder.mkdir(exist_ok=True, parents=True)
- listing_path = listing_folder / f"listing.json"
- with open(listing_path, 'w') as f:
+
+ listing = Listing(identifier)
+ with open(listing.path_listing_json(), 'w') as f:
json.dump(property, f)
-
diff --git a/crawler/2_dump_detail.py b/crawler/2_dump_detail.py
index 22b61af..1bd9247 100644
--- a/crawler/2_dump_detail.py
+++ b/crawler/2_dump_detail.py
@@ -1,25 +1,17 @@
-import pathlib
import json
from rec.query import detail_query
+from tqdm import tqdm
-folder = pathlib.Path('data/rs/')
-listings = folder.glob('*/listing.json')
+from data_access import Listing
-for listing_path in listings:
- with open(listing_path) as f:
- listing = json.load(f)
- identifier = listing['identifier']
+for listing in tqdm(Listing.get_all_listings()):
+ if listing.path_detail_json().exists():
+ continue
+
try:
- d = detail_query(identifier)
+ d = detail_query(listing.identifier)
+ with open(listing.path_detail_json(), 'w') as f:
+ json.dump(d, f)
except:
- print('Failed at: ', identifier)
+ print('Failed at: ', listing.identifier)
raise
- print(identifier)
-
- detail_path = pathlib.Path(f'data/rs/{identifier}/detail.json')
- with open(detail_path, 'w') as f:
- json.dump(d, f)
-
-
-
-
diff --git a/crawler/3_dump_images.py b/crawler/3_dump_images.py
index 7a8a2c6..51defa5 100644
--- a/crawler/3_dump_images.py
+++ b/crawler/3_dump_images.py
@@ -2,41 +2,31 @@ import pathlib
import json
from urllib.request import urlretrieve
from tqdm import tqdm
+from data_access import Listing
-folder = pathlib.Path('data/rs/')
-details = folder.glob('*/detail.json')
-
-for detail_path in tqdm(list(details)):
-
- with open(detail_path) as f:
+for listing in tqdm(Listing.get_all_listings()):
+ with open(listing.path_detail_json()) as f:
detail = json.load(f)
- identifier = detail['property']['identifier']
- rsfolder = folder / str(identifier)
-
for photo in detail['property']['photos']:
url = photo['maxSizeUrl']
picname = url.split('/')[-1]
order = photo['order']
- filename = f'{order}_{picname}'
- fullpicpath = rsfolder / 'pics' / filename
- if fullpicpath.exists():
+ p = listing.path_pic_file(order, picname)
+ if p.exists():
continue
- fullpicpath.parent.mkdir(parents=True, exist_ok=True) # create the 'pics' folder
- tqdm.write(str(fullpicpath))
- urlretrieve(url, fullpicpath)
+ tqdm.write(str(p))
+ urlretrieve(url, p)
for photo in detail['property']['floorplans']:
url = photo['url']
picname = url.split('/')[-1]
order = photo['order']
- filename = f'{order}_{picname}'
- fullpicpath = rsfolder / 'floorplans' / filename
- if fullpicpath.exists():
+ p = listing.path_floorplan_file(order, picname)
+ if p.exists():
continue
- fullpicpath.parent.mkdir(parents=True, exist_ok=True) # create the 'floorplans' folder
- tqdm.write(str(fullpicpath))
- urlretrieve(url, fullpicpath)
+ tqdm.write(str(p))
+ urlretrieve(url, p)
\ No newline at end of file
diff --git a/crawler/data_access.py b/crawler/data_access.py
index 0804ddb..d3faaaa 100644
--- a/crawler/data_access.py
+++ b/crawler/data_access.py
@@ -10,6 +10,7 @@ _DATA_DIR = pathlib.Path('data/rs/')
class Listing():
identifier: int
_cached: Dict = None
+
@staticmethod
def get_all_listings() -> List['Listing']:
@@ -23,7 +24,9 @@ class Listing():
return identifiers
def path_listing(self) -> pathlib.Path:
- return _DATA_DIR / str(self.identifier)
+ p = _DATA_DIR / str(self.identifier)
+ p.mkdir(parents=True, exist_ok=True)
+ return p
def path_listing_json(self) -> pathlib.Path:
return self.path_listing() / 'listing.json'
@@ -40,15 +43,16 @@ class Listing():
def path_pic_folder(self) -> pathlib.Path:
return self.path_listing() / 'pics'
- def path_pic_folder(self, order, name) -> pathlib.Path:
+ def path_pic_file(self, order, name) -> pathlib.Path:
+ self.path_pic_folder().mkdir(parents=True, exist_ok=True)
return self.path_pic_folder() / f'{order}_{name}'
-
def path_floorplan_folder(self) -> pathlib.Path:
return self.path_listing() / 'floorplans'
def path_floorplan_file(self, order, name) -> pathlib.Path:
- return self.path_pic_folder() / f'{order}_{name}'
+ self.path_floorplan_folder().mkdir(parents=True, exist_ok=True)
+ return self.path_floorplan_folder() / f'{order}_{name}'
def list_floorplans(self):
images = list(self.path_floorplan_folder().glob('*'))
@@ -80,7 +84,10 @@ class Listing():
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones
return max_sqm
- def calculate_sqm_ocr(self):
+ def calculate_sqm_ocr(self, recalculate=True):
+ if not recalculate and self.path_floorplan_ocr_json().exists():
+ return
+
objs = []
for floorplan_path in self.list_floorplans():
estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path)
@@ -129,12 +136,17 @@ class Listing():
return None
return self.price / self.sqm_ocr
+ @property
+ def bedrooms(self) -> int:
+ return self.detailobject['property']['bedrooms']
+
def dict_nicely(self):
return {
'sqm_ocr': self.sqm_ocr,
'price': self.price,
'price_per_sqm': self.price_per_sqm,
'url': self.url,
+ 'bedrooms': self.bedrooms,
}
diff --git a/crawler/exploration.ipynb b/crawler/exploration.ipynb
index c467c58..65bd0bb 100644
--- a/crawler/exploration.ipynb
+++ b/crawler/exploration.ipynb
@@ -71,6 +71,7 @@
"
price | \n",
" price_per_sqm | \n",
" url | \n",
+ " bedrooms | \n",
" \n",
" \n",
" \n",
@@ -80,6 +81,7 @@
" 350000.0 | \n",
" 44814.340589 | \n",
" https://www.rightmove.co.uk/properties/102360773 | \n",
+ " 1 | \n",
" \n",
" \n",
" | 1 | \n",
@@ -87,6 +89,7 @@
" 695000.0 | \n",
" 118197.278912 | \n",
" https://www.rightmove.co.uk/properties/115530848 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 2 | \n",
@@ -94,6 +97,7 @@
" 575000.0 | \n",
" 8881.680568 | \n",
" https://www.rightmove.co.uk/properties/117095606 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 3 | \n",
@@ -101,6 +105,7 @@
" 790000.0 | \n",
" 14962.121212 | \n",
" https://www.rightmove.co.uk/properties/118752221 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 4 | \n",
@@ -108,6 +113,7 @@
" 425000.0 | \n",
" 93201.754386 | \n",
" https://www.rightmove.co.uk/properties/119578451 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | ... | \n",
@@ -115,45 +121,51 @@
" ... | \n",
" ... | \n",
" ... | \n",
+ " ... | \n",
"
\n",
" \n",
- " | 1020 | \n",
+ " 1021 | \n",
" NaN | \n",
" 220000.0 | \n",
" NaN | \n",
" https://www.rightmove.co.uk/properties/86809926 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " | 1021 | \n",
+ " 1022 | \n",
" 49.00 | \n",
" 450000.0 | \n",
" 9183.673469 | \n",
" https://www.rightmove.co.uk/properties/86811141 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " | 1022 | \n",
+ " 1023 | \n",
" 58.20 | \n",
" 550000.0 | \n",
" 9450.171821 | \n",
" https://www.rightmove.co.uk/properties/86811177 | \n",
+ " 2 | \n",
"
\n",
" \n",
- " | 1023 | \n",
+ " 1024 | \n",
" 3.00 | \n",
" 475000.0 | \n",
" 158333.333333 | \n",
" https://www.rightmove.co.uk/properties/86812494 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " | 1024 | \n",
+ " 1025 | \n",
" 81.60 | \n",
" 790000.0 | \n",
" 9681.372549 | \n",
" https://www.rightmove.co.uk/properties/86972726 | \n",
+ " 2 | \n",
"
\n",
" \n",
"\n",
- "1025 rows × 4 columns
\n",
+ "1026 rows × 5 columns
\n",
""
],
"text/plain": [
@@ -164,26 +176,26 @@
"3 52.80 790000.0 14962.121212 \n",
"4 4.56 425000.0 93201.754386 \n",
"... ... ... ... \n",
- "1020 NaN 220000.0 NaN \n",
- "1021 49.00 450000.0 9183.673469 \n",
- "1022 58.20 550000.0 9450.171821 \n",
- "1023 3.00 475000.0 158333.333333 \n",
- "1024 81.60 790000.0 9681.372549 \n",
+ "1021 NaN 220000.0 NaN \n",
+ "1022 49.00 450000.0 9183.673469 \n",
+ "1023 58.20 550000.0 9450.171821 \n",
+ "1024 3.00 475000.0 158333.333333 \n",
+ "1025 81.60 790000.0 9681.372549 \n",
"\n",
- " url \n",
- "0 https://www.rightmove.co.uk/properties/102360773 \n",
- "1 https://www.rightmove.co.uk/properties/115530848 \n",
- "2 https://www.rightmove.co.uk/properties/117095606 \n",
- "3 https://www.rightmove.co.uk/properties/118752221 \n",
- "4 https://www.rightmove.co.uk/properties/119578451 \n",
- "... ... \n",
- "1020 https://www.rightmove.co.uk/properties/86809926 \n",
- "1021 https://www.rightmove.co.uk/properties/86811141 \n",
- "1022 https://www.rightmove.co.uk/properties/86811177 \n",
- "1023 https://www.rightmove.co.uk/properties/86812494 \n",
- "1024 https://www.rightmove.co.uk/properties/86972726 \n",
+ " url bedrooms \n",
+ "0 https://www.rightmove.co.uk/properties/102360773 1 \n",
+ "1 https://www.rightmove.co.uk/properties/115530848 1 \n",
+ "2 https://www.rightmove.co.uk/properties/117095606 2 \n",
+ "3 https://www.rightmove.co.uk/properties/118752221 1 \n",
+ "4 https://www.rightmove.co.uk/properties/119578451 2 \n",
+ "... ... ... \n",
+ "1021 https://www.rightmove.co.uk/properties/86809926 1 \n",
+ "1022 https://www.rightmove.co.uk/properties/86811141 1 \n",
+ "1023 https://www.rightmove.co.uk/properties/86811177 2 \n",
+ "1024 https://www.rightmove.co.uk/properties/86812494 1 \n",
+ "1025 https://www.rightmove.co.uk/properties/86972726 2 \n",
"\n",
- "[1025 rows x 4 columns]"
+ "[1026 rows x 5 columns]"
]
},
"execution_count": 4,
@@ -198,7 +210,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 5,
"id": "99c5b304-3d13-466b-a9f5-83a5db6311b5",
"metadata": {},
"outputs": [
@@ -227,6 +239,7 @@
" price | \n",
" price_per_sqm | \n",
" url | \n",
+ " bedrooms | \n",
" \n",
" \n",
" \n",
@@ -236,6 +249,7 @@
" 550000.0 | \n",
" 7.418157 | \n",
" https://www.rightmove.co.uk/properties/145546538 | \n",
+ " 2 | \n",
" \n",
" \n",
" | 249 | \n",
@@ -243,6 +257,7 @@
" 725000.0 | \n",
" 33.180626 | \n",
" https://www.rightmove.co.uk/properties/140821736 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 177 | \n",
@@ -250,6 +265,7 @@
" 695000.0 | \n",
" 5054.545455 | \n",
" https://www.rightmove.co.uk/properties/139354259 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 352 | \n",
@@ -257,6 +273,7 @@
" 475000.0 | \n",
" 5555.555556 | \n",
" https://www.rightmove.co.uk/properties/142142348 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 36 | \n",
@@ -264,6 +281,7 @@
" 475000.0 | \n",
" 5729.794934 | \n",
" https://www.rightmove.co.uk/properties/128925950 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 700 | \n",
@@ -271,6 +289,7 @@
" 650000.0 | \n",
" 6435.643564 | \n",
" https://www.rightmove.co.uk/properties/144591572 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 823 | \n",
@@ -278,6 +297,7 @@
" 525000.0 | \n",
" 6529.850746 | \n",
" https://www.rightmove.co.uk/properties/145051769 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 214 | \n",
@@ -285,6 +305,7 @@
" 650000.0 | \n",
" 7103.048847 | \n",
" https://www.rightmove.co.uk/properties/140326307 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 171 | \n",
@@ -292,6 +313,7 @@
" 650000.0 | \n",
" 7135.016465 | \n",
" https://www.rightmove.co.uk/properties/139245428 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 598 | \n",
@@ -299,6 +321,7 @@
" 795000.0 | \n",
" 7266.910420 | \n",
" https://www.rightmove.co.uk/properties/144034655 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 146 | \n",
@@ -306,6 +329,7 @@
" 700000.0 | \n",
" 7464.278098 | \n",
" https://www.rightmove.co.uk/properties/138510395 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 711 | \n",
@@ -313,6 +337,7 @@
" 750000.0 | \n",
" 7476.821852 | \n",
" https://www.rightmove.co.uk/properties/144620303 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 592 | \n",
@@ -320,6 +345,7 @@
" 650000.0 | \n",
" 7515.319690 | \n",
" https://www.rightmove.co.uk/properties/143987669 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 624 | \n",
@@ -327,6 +353,7 @@
" 775000.0 | \n",
" 7646.768624 | \n",
" https://www.rightmove.co.uk/properties/144217922 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 55 | \n",
@@ -334,6 +361,7 @@
" 750000.0 | \n",
" 7684.426230 | \n",
" https://www.rightmove.co.uk/properties/132564737 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 851 | \n",
@@ -341,6 +369,7 @@
" 625000.0 | \n",
" 7715.096902 | \n",
" https://www.rightmove.co.uk/properties/145172504 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 356 | \n",
@@ -348,6 +377,7 @@
" 695000.0 | \n",
" 7808.988764 | \n",
" https://www.rightmove.co.uk/properties/142185623 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 748 | \n",
@@ -355,6 +385,7 @@
" 675000.0 | \n",
" 7857.974389 | \n",
" https://www.rightmove.co.uk/properties/144771281 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 204 | \n",
@@ -362,6 +393,7 @@
" 650000.0 | \n",
" 7888.349515 | \n",
" https://www.rightmove.co.uk/properties/140173319 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 654 | \n",
@@ -369,6 +401,7 @@
" 665000.0 | \n",
" 7980.319213 | \n",
" https://www.rightmove.co.uk/properties/144361100 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 514 | \n",
@@ -376,6 +409,7 @@
" 700000.0 | \n",
" 8000.000000 | \n",
" https://www.rightmove.co.uk/properties/143460365 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 762 | \n",
@@ -383,6 +417,7 @@
" 750000.0 | \n",
" 8073.196986 | \n",
" https://www.rightmove.co.uk/properties/144846725 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 963 | \n",
@@ -390,6 +425,7 @@
" 750000.0 | \n",
" 8073.196986 | \n",
" https://www.rightmove.co.uk/properties/145565252 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 938 | \n",
@@ -397,6 +433,7 @@
" 699000.0 | \n",
" 8118.466899 | \n",
" https://www.rightmove.co.uk/properties/145491137 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 403 | \n",
@@ -404,6 +441,7 @@
" 750000.0 | \n",
" 8278.145695 | \n",
" https://www.rightmove.co.uk/properties/142704416 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 302 | \n",
@@ -411,6 +449,7 @@
" 795000.0 | \n",
" 8520.900322 | \n",
" https://www.rightmove.co.uk/properties/141595433 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 671 | \n",
@@ -418,6 +457,7 @@
" 800000.0 | \n",
" 8577.248847 | \n",
" https://www.rightmove.co.uk/properties/144429140 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 349 | \n",
@@ -425,6 +465,7 @@
" 695000.0 | \n",
" 8626.039469 | \n",
" https://www.rightmove.co.uk/properties/142115918 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 224 | \n",
@@ -432,6 +473,7 @@
" 800000.0 | \n",
" 8879.023307 | \n",
" https://www.rightmove.co.uk/properties/140464481 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 740 | \n",
@@ -439,6 +481,7 @@
" 735000.0 | \n",
" 8996.328029 | \n",
" https://www.rightmove.co.uk/properties/144722414 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 326 | \n",
@@ -446,6 +489,7 @@
" 800000.0 | \n",
" 9163.802978 | \n",
" https://www.rightmove.co.uk/properties/141846023 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 319 | \n",
@@ -453,6 +497,7 @@
" 750000.0 | \n",
" 9194.556822 | \n",
" https://www.rightmove.co.uk/properties/141797357 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 558 | \n",
@@ -460,6 +505,7 @@
" 800000.0 | \n",
" 9219.776420 | \n",
" https://www.rightmove.co.uk/properties/143758763 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 712 | \n",
@@ -467,6 +513,7 @@
" 800000.0 | \n",
" 9269.988413 | \n",
" https://www.rightmove.co.uk/properties/144622157 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 473 | \n",
@@ -474,6 +521,7 @@
" 800000.0 | \n",
" 9302.325581 | \n",
" https://www.rightmove.co.uk/properties/143210102 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 105 | \n",
@@ -481,13 +529,15 @@
" 750000.0 | \n",
" 9328.358209 | \n",
" https://www.rightmove.co.uk/properties/136988726 | \n",
+ " 2 | \n",
"
\n",
" \n",
- " | 1002 | \n",
+ " 1003 | \n",
" 80.27 | \n",
" 750000.0 | \n",
" 9343.465803 | \n",
" https://www.rightmove.co.uk/properties/86775291 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 235 | \n",
@@ -495,6 +545,7 @@
" 775000.0 | \n",
" 9359.903382 | \n",
" https://www.rightmove.co.uk/properties/140611055 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 65 | \n",
@@ -502,6 +553,7 @@
" 800000.0 | \n",
" 9400.705053 | \n",
" https://www.rightmove.co.uk/properties/134116232 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 30 | \n",
@@ -509,13 +561,15 @@
" 795000.0 | \n",
" 9498.207885 | \n",
" https://www.rightmove.co.uk/properties/127787960 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " | 1024 | \n",
+ " 1025 | \n",
" 81.60 | \n",
" 790000.0 | \n",
" 9681.372549 | \n",
" https://www.rightmove.co.uk/properties/86972726 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 88 | \n",
@@ -523,6 +577,7 @@
" 800000.0 | \n",
" 9785.932722 | \n",
" https://www.rightmove.co.uk/properties/136012193 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 454 | \n",
@@ -530,6 +585,7 @@
" 800000.0 | \n",
" 9900.990099 | \n",
" https://www.rightmove.co.uk/properties/143138867 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 343 | \n",
@@ -537,6 +593,7 @@
" 800000.0 | \n",
" 9920.634921 | \n",
" https://www.rightmove.co.uk/properties/142032935 | \n",
+ " 2 | \n",
"
\n",
" \n",
"\n",
@@ -580,63 +637,63 @@
"712 86.30 800000.0 9269.988413 \n",
"473 86.00 800000.0 9302.325581 \n",
"105 80.40 750000.0 9328.358209 \n",
- "1002 80.27 750000.0 9343.465803 \n",
+ "1003 80.27 750000.0 9343.465803 \n",
"235 82.80 775000.0 9359.903382 \n",
"65 85.10 800000.0 9400.705053 \n",
"30 83.70 795000.0 9498.207885 \n",
- "1024 81.60 790000.0 9681.372549 \n",
+ "1025 81.60 790000.0 9681.372549 \n",
"88 81.75 800000.0 9785.932722 \n",
"454 80.80 800000.0 9900.990099 \n",
"343 80.64 800000.0 9920.634921 \n",
"\n",
- " url \n",
- "953 https://www.rightmove.co.uk/properties/145546538 \n",
- "249 https://www.rightmove.co.uk/properties/140821736 \n",
- "177 https://www.rightmove.co.uk/properties/139354259 \n",
- "352 https://www.rightmove.co.uk/properties/142142348 \n",
- "36 https://www.rightmove.co.uk/properties/128925950 \n",
- "700 https://www.rightmove.co.uk/properties/144591572 \n",
- "823 https://www.rightmove.co.uk/properties/145051769 \n",
- "214 https://www.rightmove.co.uk/properties/140326307 \n",
- "171 https://www.rightmove.co.uk/properties/139245428 \n",
- "598 https://www.rightmove.co.uk/properties/144034655 \n",
- "146 https://www.rightmove.co.uk/properties/138510395 \n",
- "711 https://www.rightmove.co.uk/properties/144620303 \n",
- "592 https://www.rightmove.co.uk/properties/143987669 \n",
- "624 https://www.rightmove.co.uk/properties/144217922 \n",
- "55 https://www.rightmove.co.uk/properties/132564737 \n",
- "851 https://www.rightmove.co.uk/properties/145172504 \n",
- "356 https://www.rightmove.co.uk/properties/142185623 \n",
- "748 https://www.rightmove.co.uk/properties/144771281 \n",
- "204 https://www.rightmove.co.uk/properties/140173319 \n",
- "654 https://www.rightmove.co.uk/properties/144361100 \n",
- "514 https://www.rightmove.co.uk/properties/143460365 \n",
- "762 https://www.rightmove.co.uk/properties/144846725 \n",
- "963 https://www.rightmove.co.uk/properties/145565252 \n",
- "938 https://www.rightmove.co.uk/properties/145491137 \n",
- "403 https://www.rightmove.co.uk/properties/142704416 \n",
- "302 https://www.rightmove.co.uk/properties/141595433 \n",
- "671 https://www.rightmove.co.uk/properties/144429140 \n",
- "349 https://www.rightmove.co.uk/properties/142115918 \n",
- "224 https://www.rightmove.co.uk/properties/140464481 \n",
- "740 https://www.rightmove.co.uk/properties/144722414 \n",
- "326 https://www.rightmove.co.uk/properties/141846023 \n",
- "319 https://www.rightmove.co.uk/properties/141797357 \n",
- "558 https://www.rightmove.co.uk/properties/143758763 \n",
- "712 https://www.rightmove.co.uk/properties/144622157 \n",
- "473 https://www.rightmove.co.uk/properties/143210102 \n",
- "105 https://www.rightmove.co.uk/properties/136988726 \n",
- "1002 https://www.rightmove.co.uk/properties/86775291 \n",
- "235 https://www.rightmove.co.uk/properties/140611055 \n",
- "65 https://www.rightmove.co.uk/properties/134116232 \n",
- "30 https://www.rightmove.co.uk/properties/127787960 \n",
- "1024 https://www.rightmove.co.uk/properties/86972726 \n",
- "88 https://www.rightmove.co.uk/properties/136012193 \n",
- "454 https://www.rightmove.co.uk/properties/143138867 \n",
- "343 https://www.rightmove.co.uk/properties/142032935 "
+ " url bedrooms \n",
+ "953 https://www.rightmove.co.uk/properties/145546538 2 \n",
+ "249 https://www.rightmove.co.uk/properties/140821736 2 \n",
+ "177 https://www.rightmove.co.uk/properties/139354259 2 \n",
+ "352 https://www.rightmove.co.uk/properties/142142348 2 \n",
+ "36 https://www.rightmove.co.uk/properties/128925950 2 \n",
+ "700 https://www.rightmove.co.uk/properties/144591572 1 \n",
+ "823 https://www.rightmove.co.uk/properties/145051769 2 \n",
+ "214 https://www.rightmove.co.uk/properties/140326307 2 \n",
+ "171 https://www.rightmove.co.uk/properties/139245428 2 \n",
+ "598 https://www.rightmove.co.uk/properties/144034655 2 \n",
+ "146 https://www.rightmove.co.uk/properties/138510395 2 \n",
+ "711 https://www.rightmove.co.uk/properties/144620303 2 \n",
+ "592 https://www.rightmove.co.uk/properties/143987669 2 \n",
+ "624 https://www.rightmove.co.uk/properties/144217922 1 \n",
+ "55 https://www.rightmove.co.uk/properties/132564737 2 \n",
+ "851 https://www.rightmove.co.uk/properties/145172504 2 \n",
+ "356 https://www.rightmove.co.uk/properties/142185623 2 \n",
+ "748 https://www.rightmove.co.uk/properties/144771281 2 \n",
+ "204 https://www.rightmove.co.uk/properties/140173319 2 \n",
+ "654 https://www.rightmove.co.uk/properties/144361100 2 \n",
+ "514 https://www.rightmove.co.uk/properties/143460365 1 \n",
+ "762 https://www.rightmove.co.uk/properties/144846725 1 \n",
+ "963 https://www.rightmove.co.uk/properties/145565252 2 \n",
+ "938 https://www.rightmove.co.uk/properties/145491137 2 \n",
+ "403 https://www.rightmove.co.uk/properties/142704416 2 \n",
+ "302 https://www.rightmove.co.uk/properties/141595433 2 \n",
+ "671 https://www.rightmove.co.uk/properties/144429140 2 \n",
+ "349 https://www.rightmove.co.uk/properties/142115918 2 \n",
+ "224 https://www.rightmove.co.uk/properties/140464481 2 \n",
+ "740 https://www.rightmove.co.uk/properties/144722414 2 \n",
+ "326 https://www.rightmove.co.uk/properties/141846023 2 \n",
+ "319 https://www.rightmove.co.uk/properties/141797357 2 \n",
+ "558 https://www.rightmove.co.uk/properties/143758763 2 \n",
+ "712 https://www.rightmove.co.uk/properties/144622157 2 \n",
+ "473 https://www.rightmove.co.uk/properties/143210102 2 \n",
+ "105 https://www.rightmove.co.uk/properties/136988726 2 \n",
+ "1003 https://www.rightmove.co.uk/properties/86775291 2 \n",
+ "235 https://www.rightmove.co.uk/properties/140611055 2 \n",
+ "65 https://www.rightmove.co.uk/properties/134116232 2 \n",
+ "30 https://www.rightmove.co.uk/properties/127787960 1 \n",
+ "1025 https://www.rightmove.co.uk/properties/86972726 2 \n",
+ "88 https://www.rightmove.co.uk/properties/136012193 1 \n",
+ "454 https://www.rightmove.co.uk/properties/143138867 2 \n",
+ "343 https://www.rightmove.co.uk/properties/142032935 2 "
]
},
- "execution_count": 7,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -648,7 +705,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 6,
"id": "52545cfa-0932-46fe-ba7e-961fd43f2786",
"metadata": {},
"outputs": [
@@ -677,6 +734,7 @@
" price | \n",
" price_per_sqm | \n",
" url | \n",
+ " bedrooms | \n",
" \n",
" \n",
" \n",
@@ -686,6 +744,7 @@
" 400000.0 | \n",
" 5449.591281 | \n",
" https://www.rightmove.co.uk/properties/142186991 | \n",
+ " 2 | \n",
" \n",
" \n",
" | 293 | \n",
@@ -693,6 +752,7 @@
" 425000.0 | \n",
" 5519.480519 | \n",
" https://www.rightmove.co.uk/properties/141437783 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 352 | \n",
@@ -700,6 +760,7 @@
" 475000.0 | \n",
" 5555.555556 | \n",
" https://www.rightmove.co.uk/properties/142142348 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 685 | \n",
@@ -707,6 +768,7 @@
" 425000.0 | \n",
" 5589.898724 | \n",
" https://www.rightmove.co.uk/properties/144494012 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 36 | \n",
@@ -714,6 +776,7 @@
" 475000.0 | \n",
" 5729.794934 | \n",
" https://www.rightmove.co.uk/properties/128925950 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 491 | \n",
@@ -721,13 +784,15 @@
" 450000.0 | \n",
" 6277.901786 | \n",
" https://www.rightmove.co.uk/properties/143315840 | \n",
+ " 2 | \n",
"
\n",
" \n",
- " | 1019 | \n",
+ " 1020 | \n",
" 73.67 | \n",
" 495000.0 | \n",
" 6719.152980 | \n",
" https://www.rightmove.co.uk/properties/86807916 | \n",
+ " 2 | \n",
"
\n",
" \n",
"\n",
@@ -741,19 +806,19 @@
"685 76.03 425000.0 5589.898724 \n",
"36 82.90 475000.0 5729.794934 \n",
"491 71.68 450000.0 6277.901786 \n",
- "1019 73.67 495000.0 6719.152980 \n",
+ "1020 73.67 495000.0 6719.152980 \n",
"\n",
- " url \n",
- "359 https://www.rightmove.co.uk/properties/142186991 \n",
- "293 https://www.rightmove.co.uk/properties/141437783 \n",
- "352 https://www.rightmove.co.uk/properties/142142348 \n",
- "685 https://www.rightmove.co.uk/properties/144494012 \n",
- "36 https://www.rightmove.co.uk/properties/128925950 \n",
- "491 https://www.rightmove.co.uk/properties/143315840 \n",
- "1019 https://www.rightmove.co.uk/properties/86807916 "
+ " url bedrooms \n",
+ "359 https://www.rightmove.co.uk/properties/142186991 2 \n",
+ "293 https://www.rightmove.co.uk/properties/141437783 2 \n",
+ "352 https://www.rightmove.co.uk/properties/142142348 2 \n",
+ "685 https://www.rightmove.co.uk/properties/144494012 2 \n",
+ "36 https://www.rightmove.co.uk/properties/128925950 2 \n",
+ "491 https://www.rightmove.co.uk/properties/143315840 2 \n",
+ "1020 https://www.rightmove.co.uk/properties/86807916 2 "
]
},
- "execution_count": 8,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -765,7 +830,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 7,
"id": "d0246926-13ef-4110-8e3a-fb676a55c2a6",
"metadata": {},
"outputs": [
@@ -794,6 +859,7 @@
" price | \n",
" price_per_sqm | \n",
" url | \n",
+ " bedrooms | \n",
" \n",
" \n",
" \n",
@@ -803,6 +869,7 @@
" 550000.0 | \n",
" 7.418157 | \n",
" https://www.rightmove.co.uk/properties/145546538 | \n",
+ " 2 | \n",
" \n",
" \n",
" | 823 | \n",
@@ -810,6 +877,7 @@
" 525000.0 | \n",
" 6529.850746 | \n",
" https://www.rightmove.co.uk/properties/145051769 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 492 | \n",
@@ -817,6 +885,7 @@
" 525000.0 | \n",
" 7104.194858 | \n",
" https://www.rightmove.co.uk/properties/143317361 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 561 | \n",
@@ -824,6 +893,7 @@
" 550000.0 | \n",
" 7170.795306 | \n",
" https://www.rightmove.co.uk/properties/143780789 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 69 | \n",
@@ -831,13 +901,15 @@
" 525000.0 | \n",
" 7181.942544 | \n",
" https://www.rightmove.co.uk/properties/134574563 | \n",
+ " 2 | \n",
"
\n",
" \n",
- " | 987 | \n",
+ " 988 | \n",
" 72.40 | \n",
" 525000.0 | \n",
" 7251.381215 | \n",
" https://www.rightmove.co.uk/properties/86648925 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 272 | \n",
@@ -845,6 +917,7 @@
" 575000.0 | \n",
" 7382.205675 | \n",
" https://www.rightmove.co.uk/properties/141131297 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 521 | \n",
@@ -852,6 +925,7 @@
" 525000.0 | \n",
" 7406.884876 | \n",
" https://www.rightmove.co.uk/properties/143514149 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 324 | \n",
@@ -859,13 +933,15 @@
" 575000.0 | \n",
" 7555.847569 | \n",
" https://www.rightmove.co.uk/properties/141831353 | \n",
+ " 2 | \n",
"
\n",
" \n",
- " | 1017 | \n",
+ " 1018 | \n",
" 71.80 | \n",
" 550000.0 | \n",
" 7660.167131 | \n",
" https://www.rightmove.co.uk/properties/86804832 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 526 | \n",
@@ -873,6 +949,7 @@
" 600000.0 | \n",
" 7692.307692 | \n",
" https://www.rightmove.co.uk/properties/143552156 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 817 | \n",
@@ -880,6 +957,7 @@
" 550000.0 | \n",
" 7727.975270 | \n",
" https://www.rightmove.co.uk/properties/145035929 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 843 | \n",
@@ -887,6 +965,7 @@
" 600000.0 | \n",
" 7741.935484 | \n",
" https://www.rightmove.co.uk/properties/145144988 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 337 | \n",
@@ -894,6 +973,7 @@
" 550000.0 | \n",
" 7790.368272 | \n",
" https://www.rightmove.co.uk/properties/141904286 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 233 | \n",
@@ -901,6 +981,7 @@
" 600000.0 | \n",
" 7926.023778 | \n",
" https://www.rightmove.co.uk/properties/140582213 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 763 | \n",
@@ -908,6 +989,7 @@
" 600000.0 | \n",
" 8000.000000 | \n",
" https://www.rightmove.co.uk/properties/144862070 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 315 | \n",
@@ -915,6 +997,7 @@
" 590000.0 | \n",
" 8274.894811 | \n",
" https://www.rightmove.co.uk/properties/141669686 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 899 | \n",
@@ -922,6 +1005,7 @@
" 595000.0 | \n",
" 8325.171401 | \n",
" https://www.rightmove.co.uk/properties/145362911 | \n",
+ " 2 | \n",
"
\n",
" \n",
" | 147 | \n",
@@ -929,13 +1013,15 @@
" 600000.0 | \n",
" 8391.608392 | \n",
" https://www.rightmove.co.uk/properties/138537527 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " | 972 | \n",
+ " 973 | \n",
" 70.89 | \n",
" 595000.0 | \n",
" 8393.285372 | \n",
" https://www.rightmove.co.uk/properties/86296491 | \n",
+ " 2 | \n",
"
\n",
" \n",
"\n",
@@ -948,11 +1034,11 @@
"492 73.90 525000.0 7104.194858 \n",
"561 76.70 550000.0 7170.795306 \n",
"69 73.10 525000.0 7181.942544 \n",
- "987 72.40 525000.0 7251.381215 \n",
+ "988 72.40 525000.0 7251.381215 \n",
"272 77.89 575000.0 7382.205675 \n",
"521 70.88 525000.0 7406.884876 \n",
"324 76.10 575000.0 7555.847569 \n",
- "1017 71.80 550000.0 7660.167131 \n",
+ "1018 71.80 550000.0 7660.167131 \n",
"526 78.00 600000.0 7692.307692 \n",
"817 71.17 550000.0 7727.975270 \n",
"843 77.50 600000.0 7741.935484 \n",
@@ -962,32 +1048,32 @@
"315 71.30 590000.0 8274.894811 \n",
"899 71.47 595000.0 8325.171401 \n",
"147 71.50 600000.0 8391.608392 \n",
- "972 70.89 595000.0 8393.285372 \n",
+ "973 70.89 595000.0 8393.285372 \n",
"\n",
- " url \n",
- "953 https://www.rightmove.co.uk/properties/145546538 \n",
- "823 https://www.rightmove.co.uk/properties/145051769 \n",
- "492 https://www.rightmove.co.uk/properties/143317361 \n",
- "561 https://www.rightmove.co.uk/properties/143780789 \n",
- "69 https://www.rightmove.co.uk/properties/134574563 \n",
- "987 https://www.rightmove.co.uk/properties/86648925 \n",
- "272 https://www.rightmove.co.uk/properties/141131297 \n",
- "521 https://www.rightmove.co.uk/properties/143514149 \n",
- "324 https://www.rightmove.co.uk/properties/141831353 \n",
- "1017 https://www.rightmove.co.uk/properties/86804832 \n",
- "526 https://www.rightmove.co.uk/properties/143552156 \n",
- "817 https://www.rightmove.co.uk/properties/145035929 \n",
- "843 https://www.rightmove.co.uk/properties/145144988 \n",
- "337 https://www.rightmove.co.uk/properties/141904286 \n",
- "233 https://www.rightmove.co.uk/properties/140582213 \n",
- "763 https://www.rightmove.co.uk/properties/144862070 \n",
- "315 https://www.rightmove.co.uk/properties/141669686 \n",
- "899 https://www.rightmove.co.uk/properties/145362911 \n",
- "147 https://www.rightmove.co.uk/properties/138537527 \n",
- "972 https://www.rightmove.co.uk/properties/86296491 "
+ " url bedrooms \n",
+ "953 https://www.rightmove.co.uk/properties/145546538 2 \n",
+ "823 https://www.rightmove.co.uk/properties/145051769 2 \n",
+ "492 https://www.rightmove.co.uk/properties/143317361 2 \n",
+ "561 https://www.rightmove.co.uk/properties/143780789 2 \n",
+ "69 https://www.rightmove.co.uk/properties/134574563 2 \n",
+ "988 https://www.rightmove.co.uk/properties/86648925 2 \n",
+ "272 https://www.rightmove.co.uk/properties/141131297 2 \n",
+ "521 https://www.rightmove.co.uk/properties/143514149 2 \n",
+ "324 https://www.rightmove.co.uk/properties/141831353 2 \n",
+ "1018 https://www.rightmove.co.uk/properties/86804832 2 \n",
+ "526 https://www.rightmove.co.uk/properties/143552156 1 \n",
+ "817 https://www.rightmove.co.uk/properties/145035929 2 \n",
+ "843 https://www.rightmove.co.uk/properties/145144988 2 \n",
+ "337 https://www.rightmove.co.uk/properties/141904286 2 \n",
+ "233 https://www.rightmove.co.uk/properties/140582213 2 \n",
+ "763 https://www.rightmove.co.uk/properties/144862070 2 \n",
+ "315 https://www.rightmove.co.uk/properties/141669686 2 \n",
+ "899 https://www.rightmove.co.uk/properties/145362911 2 \n",
+ "147 https://www.rightmove.co.uk/properties/138537527 1 \n",
+ "973 https://www.rightmove.co.uk/properties/86296491 2 "
]
},
- "execution_count": 9,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -1004,6 +1090,70 @@
"metadata": {},
"outputs": [],
"source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "edd9fa24-cad2-4448-9b17-c6d514564f41",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sqm_ocr | \n",
+ " price | \n",
+ " price_per_sqm | \n",
+ " url | \n",
+ " bedrooms | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [sqm_ocr, price, price_per_sqm, url, bedrooms]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[df.bedrooms > 2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "002b2a3a-3ecc-45c1-8c2f-c143380ee0d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n"
+ ]
}
],
"metadata": {