diff --git a/crawler/1_dump_listings.py b/crawler/1_dump_listings.py index 80e79b2..24f5cb4 100644 --- a/crawler/1_dump_listings.py +++ b/crawler/1_dump_listings.py @@ -1,22 +1,21 @@ from rec.query import listing_query import pathlib import json +from data_access import Listing -d = listing_query(1, 1, 2, 15, 0, 800000) +d = listing_query(1, 3, 3, 15, 0, 800000) folder = pathlib.Path("data/rs/") for i in range(1, 10000): try: print(f"page {i}") - d = listing_query(i, 1, 2, 15, 0, 800000) + d = listing_query(i, 3, 3, 15, 0, 800000) except: break for property in d['properties']: identifier = property['identifier'] - listing_folder = folder / str(identifier) - listing_folder.mkdir(exist_ok=True, parents=True) - listing_path = listing_folder / f"listing.json" - with open(listing_path, 'w') as f: + + listing = Listing(identifier) + with open(listing.path_listing_json(), 'w') as f: json.dump(property, f) - diff --git a/crawler/2_dump_detail.py b/crawler/2_dump_detail.py index 22b61af..1bd9247 100644 --- a/crawler/2_dump_detail.py +++ b/crawler/2_dump_detail.py @@ -1,25 +1,17 @@ -import pathlib import json from rec.query import detail_query +from tqdm import tqdm -folder = pathlib.Path('data/rs/') -listings = folder.glob('*/listing.json') +from data_access import Listing -for listing_path in listings: - with open(listing_path) as f: - listing = json.load(f) - identifier = listing['identifier'] +for listing in tqdm(Listing.get_all_listings()): + if listing.path_detail_json().exists(): + continue + try: - d = detail_query(identifier) + d = detail_query(listing.identifier) + with open(listing.path_detail_json(), 'w') as f: + json.dump(d, f) except: - print('Failed at: ', identifier) + print('Failed at: ', listing.identifier) raise - print(identifier) - - detail_path = pathlib.Path(f'data/rs/{identifier}/detail.json') - with open(detail_path, 'w') as f: - json.dump(d, f) - - - - diff --git a/crawler/3_dump_images.py b/crawler/3_dump_images.py index 7a8a2c6..51defa5 100644 --- a/crawler/3_dump_images.py +++ b/crawler/3_dump_images.py @@ -2,41 +2,31 @@ import pathlib import json from urllib.request import urlretrieve from tqdm import tqdm +from data_access import Listing -folder = pathlib.Path('data/rs/') -details = folder.glob('*/detail.json') - -for detail_path in tqdm(list(details)): - - with open(detail_path) as f: +for listing in tqdm(Listing.get_all_listings()): + with open(listing.path_detail_json()) as f: detail = json.load(f) - identifier = detail['property']['identifier'] - rsfolder = folder / str(identifier) - for photo in detail['property']['photos']: url = photo['maxSizeUrl'] picname = url.split('/')[-1] order = photo['order'] - filename = f'{order}_{picname}' - fullpicpath = rsfolder / 'pics' / filename - if fullpicpath.exists(): + p = listing.path_pic_file(order, picname) + if p.exists(): continue - fullpicpath.parent.mkdir(parents=True, exist_ok=True) # create the 'pics' folder - tqdm.write(str(fullpicpath)) - urlretrieve(url, fullpicpath) + tqdm.write(str(p)) + urlretrieve(url, p) for photo in detail['property']['floorplans']: url = photo['url'] picname = url.split('/')[-1] order = photo['order'] - filename = f'{order}_{picname}' - fullpicpath = rsfolder / 'floorplans' / filename - if fullpicpath.exists(): + p = listing.path_floorplan_file(order, picname) + if p.exists(): continue - fullpicpath.parent.mkdir(parents=True, exist_ok=True) # create the 'floorplans' folder - tqdm.write(str(fullpicpath)) - urlretrieve(url, fullpicpath) + tqdm.write(str(p)) + urlretrieve(url, p) \ No newline at end of file diff --git a/crawler/data_access.py b/crawler/data_access.py index 0804ddb..d3faaaa 100644 --- a/crawler/data_access.py +++ b/crawler/data_access.py @@ -10,6 +10,7 @@ _DATA_DIR = pathlib.Path('data/rs/') class Listing(): identifier: int _cached: Dict = None + @staticmethod def get_all_listings() -> List['Listing']: @@ -23,7 +24,9 @@ class Listing(): return identifiers def path_listing(self) -> pathlib.Path: - return _DATA_DIR / str(self.identifier) + p = _DATA_DIR / str(self.identifier) + p.mkdir(parents=True, exist_ok=True) + return p def path_listing_json(self) -> pathlib.Path: return self.path_listing() / 'listing.json' @@ -40,15 +43,16 @@ class Listing(): def path_pic_folder(self) -> pathlib.Path: return self.path_listing() / 'pics' - def path_pic_folder(self, order, name) -> pathlib.Path: + def path_pic_file(self, order, name) -> pathlib.Path: + self.path_pic_folder().mkdir(parents=True, exist_ok=True) return self.path_pic_folder() / f'{order}_{name}' - def path_floorplan_folder(self) -> pathlib.Path: return self.path_listing() / 'floorplans' def path_floorplan_file(self, order, name) -> pathlib.Path: - return self.path_pic_folder() / f'{order}_{name}' + self.path_floorplan_folder().mkdir(parents=True, exist_ok=True) + return self.path_floorplan_folder() / f'{order}_{name}' def list_floorplans(self): images = list(self.path_floorplan_folder().glob('*')) @@ -80,7 +84,10 @@ class Listing(): max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones return max_sqm - def calculate_sqm_ocr(self): + def calculate_sqm_ocr(self, recalculate=True): + if not recalculate and self.path_floorplan_ocr_json().exists(): + return + objs = [] for floorplan_path in self.list_floorplans(): estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path) @@ -129,12 +136,17 @@ class Listing(): return None return self.price / self.sqm_ocr + @property + def bedrooms(self) -> int: + return self.detailobject['property']['bedrooms'] + def dict_nicely(self): return { 'sqm_ocr': self.sqm_ocr, 'price': self.price, 'price_per_sqm': self.price_per_sqm, 'url': self.url, + 'bedrooms': self.bedrooms, } diff --git a/crawler/exploration.ipynb b/crawler/exploration.ipynb index c467c58..65bd0bb 100644 --- a/crawler/exploration.ipynb +++ b/crawler/exploration.ipynb @@ -71,6 +71,7 @@ " price\n", " price_per_sqm\n", " url\n", + " bedrooms\n", " \n", " \n", " \n", @@ -80,6 +81,7 @@ " 350000.0\n", " 44814.340589\n", " https://www.rightmove.co.uk/properties/102360773\n", + " 1\n", " \n", " \n", " 1\n", @@ -87,6 +89,7 @@ " 695000.0\n", " 118197.278912\n", " https://www.rightmove.co.uk/properties/115530848\n", + " 1\n", " \n", " \n", " 2\n", @@ -94,6 +97,7 @@ " 575000.0\n", " 8881.680568\n", " https://www.rightmove.co.uk/properties/117095606\n", + " 2\n", " \n", " \n", " 3\n", @@ -101,6 +105,7 @@ " 790000.0\n", " 14962.121212\n", " https://www.rightmove.co.uk/properties/118752221\n", + " 1\n", " \n", " \n", " 4\n", @@ -108,6 +113,7 @@ " 425000.0\n", " 93201.754386\n", " https://www.rightmove.co.uk/properties/119578451\n", + " 2\n", " \n", " \n", " ...\n", @@ -115,45 +121,51 @@ " ...\n", " ...\n", " ...\n", + " ...\n", " \n", " \n", - " 1020\n", + " 1021\n", " NaN\n", " 220000.0\n", " NaN\n", " https://www.rightmove.co.uk/properties/86809926\n", + " 1\n", " \n", " \n", - " 1021\n", + " 1022\n", " 49.00\n", " 450000.0\n", " 9183.673469\n", " https://www.rightmove.co.uk/properties/86811141\n", + " 1\n", " \n", " \n", - " 1022\n", + " 1023\n", " 58.20\n", " 550000.0\n", " 9450.171821\n", " https://www.rightmove.co.uk/properties/86811177\n", + " 2\n", " \n", " \n", - " 1023\n", + " 1024\n", " 3.00\n", " 475000.0\n", " 158333.333333\n", " https://www.rightmove.co.uk/properties/86812494\n", + " 1\n", " \n", " \n", - " 1024\n", + " 1025\n", " 81.60\n", " 790000.0\n", " 9681.372549\n", " https://www.rightmove.co.uk/properties/86972726\n", + " 2\n", " \n", " \n", "\n", - "

1025 rows × 4 columns

\n", + "

1026 rows × 5 columns

\n", "" ], "text/plain": [ @@ -164,26 +176,26 @@ "3 52.80 790000.0 14962.121212 \n", "4 4.56 425000.0 93201.754386 \n", "... ... ... ... \n", - "1020 NaN 220000.0 NaN \n", - "1021 49.00 450000.0 9183.673469 \n", - "1022 58.20 550000.0 9450.171821 \n", - "1023 3.00 475000.0 158333.333333 \n", - "1024 81.60 790000.0 9681.372549 \n", + "1021 NaN 220000.0 NaN \n", + "1022 49.00 450000.0 9183.673469 \n", + "1023 58.20 550000.0 9450.171821 \n", + "1024 3.00 475000.0 158333.333333 \n", + "1025 81.60 790000.0 9681.372549 \n", "\n", - " url \n", - "0 https://www.rightmove.co.uk/properties/102360773 \n", - "1 https://www.rightmove.co.uk/properties/115530848 \n", - "2 https://www.rightmove.co.uk/properties/117095606 \n", - "3 https://www.rightmove.co.uk/properties/118752221 \n", - "4 https://www.rightmove.co.uk/properties/119578451 \n", - "... ... \n", - "1020 https://www.rightmove.co.uk/properties/86809926 \n", - "1021 https://www.rightmove.co.uk/properties/86811141 \n", - "1022 https://www.rightmove.co.uk/properties/86811177 \n", - "1023 https://www.rightmove.co.uk/properties/86812494 \n", - "1024 https://www.rightmove.co.uk/properties/86972726 \n", + " url bedrooms \n", + "0 https://www.rightmove.co.uk/properties/102360773 1 \n", + "1 https://www.rightmove.co.uk/properties/115530848 1 \n", + "2 https://www.rightmove.co.uk/properties/117095606 2 \n", + "3 https://www.rightmove.co.uk/properties/118752221 1 \n", + "4 https://www.rightmove.co.uk/properties/119578451 2 \n", + "... ... ... \n", + "1021 https://www.rightmove.co.uk/properties/86809926 1 \n", + "1022 https://www.rightmove.co.uk/properties/86811141 1 \n", + "1023 https://www.rightmove.co.uk/properties/86811177 2 \n", + "1024 https://www.rightmove.co.uk/properties/86812494 1 \n", + "1025 https://www.rightmove.co.uk/properties/86972726 2 \n", "\n", - "[1025 rows x 4 columns]" + "[1026 rows x 5 columns]" ] }, "execution_count": 4, @@ -198,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "id": "99c5b304-3d13-466b-a9f5-83a5db6311b5", "metadata": {}, "outputs": [ @@ -227,6 +239,7 @@ " price\n", " price_per_sqm\n", " url\n", + " bedrooms\n", " \n", " \n", " \n", @@ -236,6 +249,7 @@ " 550000.0\n", " 7.418157\n", " https://www.rightmove.co.uk/properties/145546538\n", + " 2\n", " \n", " \n", " 249\n", @@ -243,6 +257,7 @@ " 725000.0\n", " 33.180626\n", " https://www.rightmove.co.uk/properties/140821736\n", + " 2\n", " \n", " \n", " 177\n", @@ -250,6 +265,7 @@ " 695000.0\n", " 5054.545455\n", " https://www.rightmove.co.uk/properties/139354259\n", + " 2\n", " \n", " \n", " 352\n", @@ -257,6 +273,7 @@ " 475000.0\n", " 5555.555556\n", " https://www.rightmove.co.uk/properties/142142348\n", + " 2\n", " \n", " \n", " 36\n", @@ -264,6 +281,7 @@ " 475000.0\n", " 5729.794934\n", " https://www.rightmove.co.uk/properties/128925950\n", + " 2\n", " \n", " \n", " 700\n", @@ -271,6 +289,7 @@ " 650000.0\n", " 6435.643564\n", " https://www.rightmove.co.uk/properties/144591572\n", + " 1\n", " \n", " \n", " 823\n", @@ -278,6 +297,7 @@ " 525000.0\n", " 6529.850746\n", " https://www.rightmove.co.uk/properties/145051769\n", + " 2\n", " \n", " \n", " 214\n", @@ -285,6 +305,7 @@ " 650000.0\n", " 7103.048847\n", " https://www.rightmove.co.uk/properties/140326307\n", + " 2\n", " \n", " \n", " 171\n", @@ -292,6 +313,7 @@ " 650000.0\n", " 7135.016465\n", " https://www.rightmove.co.uk/properties/139245428\n", + " 2\n", " \n", " \n", " 598\n", @@ -299,6 +321,7 @@ " 795000.0\n", " 7266.910420\n", " https://www.rightmove.co.uk/properties/144034655\n", + " 2\n", " \n", " \n", " 146\n", @@ -306,6 +329,7 @@ " 700000.0\n", " 7464.278098\n", " https://www.rightmove.co.uk/properties/138510395\n", + " 2\n", " \n", " \n", " 711\n", @@ -313,6 +337,7 @@ " 750000.0\n", " 7476.821852\n", " https://www.rightmove.co.uk/properties/144620303\n", + " 2\n", " \n", " \n", " 592\n", @@ -320,6 +345,7 @@ " 650000.0\n", " 7515.319690\n", " https://www.rightmove.co.uk/properties/143987669\n", + " 2\n", " \n", " \n", " 624\n", @@ -327,6 +353,7 @@ " 775000.0\n", " 7646.768624\n", " https://www.rightmove.co.uk/properties/144217922\n", + " 1\n", " \n", " \n", " 55\n", @@ -334,6 +361,7 @@ " 750000.0\n", " 7684.426230\n", " https://www.rightmove.co.uk/properties/132564737\n", + " 2\n", " \n", " \n", " 851\n", @@ -341,6 +369,7 @@ " 625000.0\n", " 7715.096902\n", " https://www.rightmove.co.uk/properties/145172504\n", + " 2\n", " \n", " \n", " 356\n", @@ -348,6 +377,7 @@ " 695000.0\n", " 7808.988764\n", " https://www.rightmove.co.uk/properties/142185623\n", + " 2\n", " \n", " \n", " 748\n", @@ -355,6 +385,7 @@ " 675000.0\n", " 7857.974389\n", " https://www.rightmove.co.uk/properties/144771281\n", + " 2\n", " \n", " \n", " 204\n", @@ -362,6 +393,7 @@ " 650000.0\n", " 7888.349515\n", " https://www.rightmove.co.uk/properties/140173319\n", + " 2\n", " \n", " \n", " 654\n", @@ -369,6 +401,7 @@ " 665000.0\n", " 7980.319213\n", " https://www.rightmove.co.uk/properties/144361100\n", + " 2\n", " \n", " \n", " 514\n", @@ -376,6 +409,7 @@ " 700000.0\n", " 8000.000000\n", " https://www.rightmove.co.uk/properties/143460365\n", + " 1\n", " \n", " \n", " 762\n", @@ -383,6 +417,7 @@ " 750000.0\n", " 8073.196986\n", " https://www.rightmove.co.uk/properties/144846725\n", + " 1\n", " \n", " \n", " 963\n", @@ -390,6 +425,7 @@ " 750000.0\n", " 8073.196986\n", " https://www.rightmove.co.uk/properties/145565252\n", + " 2\n", " \n", " \n", " 938\n", @@ -397,6 +433,7 @@ " 699000.0\n", " 8118.466899\n", " https://www.rightmove.co.uk/properties/145491137\n", + " 2\n", " \n", " \n", " 403\n", @@ -404,6 +441,7 @@ " 750000.0\n", " 8278.145695\n", " https://www.rightmove.co.uk/properties/142704416\n", + " 2\n", " \n", " \n", " 302\n", @@ -411,6 +449,7 @@ " 795000.0\n", " 8520.900322\n", " https://www.rightmove.co.uk/properties/141595433\n", + " 2\n", " \n", " \n", " 671\n", @@ -418,6 +457,7 @@ " 800000.0\n", " 8577.248847\n", " https://www.rightmove.co.uk/properties/144429140\n", + " 2\n", " \n", " \n", " 349\n", @@ -425,6 +465,7 @@ " 695000.0\n", " 8626.039469\n", " https://www.rightmove.co.uk/properties/142115918\n", + " 2\n", " \n", " \n", " 224\n", @@ -432,6 +473,7 @@ " 800000.0\n", " 8879.023307\n", " https://www.rightmove.co.uk/properties/140464481\n", + " 2\n", " \n", " \n", " 740\n", @@ -439,6 +481,7 @@ " 735000.0\n", " 8996.328029\n", " https://www.rightmove.co.uk/properties/144722414\n", + " 2\n", " \n", " \n", " 326\n", @@ -446,6 +489,7 @@ " 800000.0\n", " 9163.802978\n", " https://www.rightmove.co.uk/properties/141846023\n", + " 2\n", " \n", " \n", " 319\n", @@ -453,6 +497,7 @@ " 750000.0\n", " 9194.556822\n", " https://www.rightmove.co.uk/properties/141797357\n", + " 2\n", " \n", " \n", " 558\n", @@ -460,6 +505,7 @@ " 800000.0\n", " 9219.776420\n", " https://www.rightmove.co.uk/properties/143758763\n", + " 2\n", " \n", " \n", " 712\n", @@ -467,6 +513,7 @@ " 800000.0\n", " 9269.988413\n", " https://www.rightmove.co.uk/properties/144622157\n", + " 2\n", " \n", " \n", " 473\n", @@ -474,6 +521,7 @@ " 800000.0\n", " 9302.325581\n", " https://www.rightmove.co.uk/properties/143210102\n", + " 2\n", " \n", " \n", " 105\n", @@ -481,13 +529,15 @@ " 750000.0\n", " 9328.358209\n", " https://www.rightmove.co.uk/properties/136988726\n", + " 2\n", " \n", " \n", - " 1002\n", + " 1003\n", " 80.27\n", " 750000.0\n", " 9343.465803\n", " https://www.rightmove.co.uk/properties/86775291\n", + " 2\n", " \n", " \n", " 235\n", @@ -495,6 +545,7 @@ " 775000.0\n", " 9359.903382\n", " https://www.rightmove.co.uk/properties/140611055\n", + " 2\n", " \n", " \n", " 65\n", @@ -502,6 +553,7 @@ " 800000.0\n", " 9400.705053\n", " https://www.rightmove.co.uk/properties/134116232\n", + " 2\n", " \n", " \n", " 30\n", @@ -509,13 +561,15 @@ " 795000.0\n", " 9498.207885\n", " https://www.rightmove.co.uk/properties/127787960\n", + " 1\n", " \n", " \n", - " 1024\n", + " 1025\n", " 81.60\n", " 790000.0\n", " 9681.372549\n", " https://www.rightmove.co.uk/properties/86972726\n", + " 2\n", " \n", " \n", " 88\n", @@ -523,6 +577,7 @@ " 800000.0\n", " 9785.932722\n", " https://www.rightmove.co.uk/properties/136012193\n", + " 1\n", " \n", " \n", " 454\n", @@ -530,6 +585,7 @@ " 800000.0\n", " 9900.990099\n", " https://www.rightmove.co.uk/properties/143138867\n", + " 2\n", " \n", " \n", " 343\n", @@ -537,6 +593,7 @@ " 800000.0\n", " 9920.634921\n", " https://www.rightmove.co.uk/properties/142032935\n", + " 2\n", " \n", " \n", "\n", @@ -580,63 +637,63 @@ "712 86.30 800000.0 9269.988413 \n", "473 86.00 800000.0 9302.325581 \n", "105 80.40 750000.0 9328.358209 \n", - "1002 80.27 750000.0 9343.465803 \n", + "1003 80.27 750000.0 9343.465803 \n", "235 82.80 775000.0 9359.903382 \n", "65 85.10 800000.0 9400.705053 \n", "30 83.70 795000.0 9498.207885 \n", - "1024 81.60 790000.0 9681.372549 \n", + "1025 81.60 790000.0 9681.372549 \n", "88 81.75 800000.0 9785.932722 \n", "454 80.80 800000.0 9900.990099 \n", "343 80.64 800000.0 9920.634921 \n", "\n", - " url \n", - "953 https://www.rightmove.co.uk/properties/145546538 \n", - "249 https://www.rightmove.co.uk/properties/140821736 \n", - "177 https://www.rightmove.co.uk/properties/139354259 \n", - "352 https://www.rightmove.co.uk/properties/142142348 \n", - "36 https://www.rightmove.co.uk/properties/128925950 \n", - "700 https://www.rightmove.co.uk/properties/144591572 \n", - "823 https://www.rightmove.co.uk/properties/145051769 \n", - "214 https://www.rightmove.co.uk/properties/140326307 \n", - "171 https://www.rightmove.co.uk/properties/139245428 \n", - "598 https://www.rightmove.co.uk/properties/144034655 \n", - "146 https://www.rightmove.co.uk/properties/138510395 \n", - "711 https://www.rightmove.co.uk/properties/144620303 \n", - "592 https://www.rightmove.co.uk/properties/143987669 \n", - "624 https://www.rightmove.co.uk/properties/144217922 \n", - "55 https://www.rightmove.co.uk/properties/132564737 \n", - "851 https://www.rightmove.co.uk/properties/145172504 \n", - "356 https://www.rightmove.co.uk/properties/142185623 \n", - "748 https://www.rightmove.co.uk/properties/144771281 \n", - "204 https://www.rightmove.co.uk/properties/140173319 \n", - "654 https://www.rightmove.co.uk/properties/144361100 \n", - "514 https://www.rightmove.co.uk/properties/143460365 \n", - "762 https://www.rightmove.co.uk/properties/144846725 \n", - "963 https://www.rightmove.co.uk/properties/145565252 \n", - "938 https://www.rightmove.co.uk/properties/145491137 \n", - "403 https://www.rightmove.co.uk/properties/142704416 \n", - "302 https://www.rightmove.co.uk/properties/141595433 \n", - "671 https://www.rightmove.co.uk/properties/144429140 \n", - "349 https://www.rightmove.co.uk/properties/142115918 \n", - "224 https://www.rightmove.co.uk/properties/140464481 \n", - "740 https://www.rightmove.co.uk/properties/144722414 \n", - "326 https://www.rightmove.co.uk/properties/141846023 \n", - "319 https://www.rightmove.co.uk/properties/141797357 \n", - "558 https://www.rightmove.co.uk/properties/143758763 \n", - "712 https://www.rightmove.co.uk/properties/144622157 \n", - "473 https://www.rightmove.co.uk/properties/143210102 \n", - "105 https://www.rightmove.co.uk/properties/136988726 \n", - "1002 https://www.rightmove.co.uk/properties/86775291 \n", - "235 https://www.rightmove.co.uk/properties/140611055 \n", - "65 https://www.rightmove.co.uk/properties/134116232 \n", - "30 https://www.rightmove.co.uk/properties/127787960 \n", - "1024 https://www.rightmove.co.uk/properties/86972726 \n", - "88 https://www.rightmove.co.uk/properties/136012193 \n", - "454 https://www.rightmove.co.uk/properties/143138867 \n", - "343 https://www.rightmove.co.uk/properties/142032935 " + " url bedrooms \n", + "953 https://www.rightmove.co.uk/properties/145546538 2 \n", + "249 https://www.rightmove.co.uk/properties/140821736 2 \n", + "177 https://www.rightmove.co.uk/properties/139354259 2 \n", + "352 https://www.rightmove.co.uk/properties/142142348 2 \n", + "36 https://www.rightmove.co.uk/properties/128925950 2 \n", + "700 https://www.rightmove.co.uk/properties/144591572 1 \n", + "823 https://www.rightmove.co.uk/properties/145051769 2 \n", + "214 https://www.rightmove.co.uk/properties/140326307 2 \n", + "171 https://www.rightmove.co.uk/properties/139245428 2 \n", + "598 https://www.rightmove.co.uk/properties/144034655 2 \n", + "146 https://www.rightmove.co.uk/properties/138510395 2 \n", + "711 https://www.rightmove.co.uk/properties/144620303 2 \n", + "592 https://www.rightmove.co.uk/properties/143987669 2 \n", + "624 https://www.rightmove.co.uk/properties/144217922 1 \n", + "55 https://www.rightmove.co.uk/properties/132564737 2 \n", + "851 https://www.rightmove.co.uk/properties/145172504 2 \n", + "356 https://www.rightmove.co.uk/properties/142185623 2 \n", + "748 https://www.rightmove.co.uk/properties/144771281 2 \n", + "204 https://www.rightmove.co.uk/properties/140173319 2 \n", + "654 https://www.rightmove.co.uk/properties/144361100 2 \n", + "514 https://www.rightmove.co.uk/properties/143460365 1 \n", + "762 https://www.rightmove.co.uk/properties/144846725 1 \n", + "963 https://www.rightmove.co.uk/properties/145565252 2 \n", + "938 https://www.rightmove.co.uk/properties/145491137 2 \n", + "403 https://www.rightmove.co.uk/properties/142704416 2 \n", + "302 https://www.rightmove.co.uk/properties/141595433 2 \n", + "671 https://www.rightmove.co.uk/properties/144429140 2 \n", + "349 https://www.rightmove.co.uk/properties/142115918 2 \n", + "224 https://www.rightmove.co.uk/properties/140464481 2 \n", + "740 https://www.rightmove.co.uk/properties/144722414 2 \n", + "326 https://www.rightmove.co.uk/properties/141846023 2 \n", + "319 https://www.rightmove.co.uk/properties/141797357 2 \n", + "558 https://www.rightmove.co.uk/properties/143758763 2 \n", + "712 https://www.rightmove.co.uk/properties/144622157 2 \n", + "473 https://www.rightmove.co.uk/properties/143210102 2 \n", + "105 https://www.rightmove.co.uk/properties/136988726 2 \n", + "1003 https://www.rightmove.co.uk/properties/86775291 2 \n", + "235 https://www.rightmove.co.uk/properties/140611055 2 \n", + "65 https://www.rightmove.co.uk/properties/134116232 2 \n", + "30 https://www.rightmove.co.uk/properties/127787960 1 \n", + "1025 https://www.rightmove.co.uk/properties/86972726 2 \n", + "88 https://www.rightmove.co.uk/properties/136012193 1 \n", + "454 https://www.rightmove.co.uk/properties/143138867 2 \n", + "343 https://www.rightmove.co.uk/properties/142032935 2 " ] }, - "execution_count": 7, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -648,7 +705,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "id": "52545cfa-0932-46fe-ba7e-961fd43f2786", "metadata": {}, "outputs": [ @@ -677,6 +734,7 @@ " price\n", " price_per_sqm\n", " url\n", + " bedrooms\n", " \n", " \n", " \n", @@ -686,6 +744,7 @@ " 400000.0\n", " 5449.591281\n", " https://www.rightmove.co.uk/properties/142186991\n", + " 2\n", " \n", " \n", " 293\n", @@ -693,6 +752,7 @@ " 425000.0\n", " 5519.480519\n", " https://www.rightmove.co.uk/properties/141437783\n", + " 2\n", " \n", " \n", " 352\n", @@ -700,6 +760,7 @@ " 475000.0\n", " 5555.555556\n", " https://www.rightmove.co.uk/properties/142142348\n", + " 2\n", " \n", " \n", " 685\n", @@ -707,6 +768,7 @@ " 425000.0\n", " 5589.898724\n", " https://www.rightmove.co.uk/properties/144494012\n", + " 2\n", " \n", " \n", " 36\n", @@ -714,6 +776,7 @@ " 475000.0\n", " 5729.794934\n", " https://www.rightmove.co.uk/properties/128925950\n", + " 2\n", " \n", " \n", " 491\n", @@ -721,13 +784,15 @@ " 450000.0\n", " 6277.901786\n", " https://www.rightmove.co.uk/properties/143315840\n", + " 2\n", " \n", " \n", - " 1019\n", + " 1020\n", " 73.67\n", " 495000.0\n", " 6719.152980\n", " https://www.rightmove.co.uk/properties/86807916\n", + " 2\n", " \n", " \n", "\n", @@ -741,19 +806,19 @@ "685 76.03 425000.0 5589.898724 \n", "36 82.90 475000.0 5729.794934 \n", "491 71.68 450000.0 6277.901786 \n", - "1019 73.67 495000.0 6719.152980 \n", + "1020 73.67 495000.0 6719.152980 \n", "\n", - " url \n", - "359 https://www.rightmove.co.uk/properties/142186991 \n", - "293 https://www.rightmove.co.uk/properties/141437783 \n", - "352 https://www.rightmove.co.uk/properties/142142348 \n", - "685 https://www.rightmove.co.uk/properties/144494012 \n", - "36 https://www.rightmove.co.uk/properties/128925950 \n", - "491 https://www.rightmove.co.uk/properties/143315840 \n", - "1019 https://www.rightmove.co.uk/properties/86807916 " + " url bedrooms \n", + "359 https://www.rightmove.co.uk/properties/142186991 2 \n", + "293 https://www.rightmove.co.uk/properties/141437783 2 \n", + "352 https://www.rightmove.co.uk/properties/142142348 2 \n", + "685 https://www.rightmove.co.uk/properties/144494012 2 \n", + "36 https://www.rightmove.co.uk/properties/128925950 2 \n", + "491 https://www.rightmove.co.uk/properties/143315840 2 \n", + "1020 https://www.rightmove.co.uk/properties/86807916 2 " ] }, - "execution_count": 8, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -765,7 +830,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "d0246926-13ef-4110-8e3a-fb676a55c2a6", "metadata": {}, "outputs": [ @@ -794,6 +859,7 @@ " price\n", " price_per_sqm\n", " url\n", + " bedrooms\n", " \n", " \n", " \n", @@ -803,6 +869,7 @@ " 550000.0\n", " 7.418157\n", " https://www.rightmove.co.uk/properties/145546538\n", + " 2\n", " \n", " \n", " 823\n", @@ -810,6 +877,7 @@ " 525000.0\n", " 6529.850746\n", " https://www.rightmove.co.uk/properties/145051769\n", + " 2\n", " \n", " \n", " 492\n", @@ -817,6 +885,7 @@ " 525000.0\n", " 7104.194858\n", " https://www.rightmove.co.uk/properties/143317361\n", + " 2\n", " \n", " \n", " 561\n", @@ -824,6 +893,7 @@ " 550000.0\n", " 7170.795306\n", " https://www.rightmove.co.uk/properties/143780789\n", + " 2\n", " \n", " \n", " 69\n", @@ -831,13 +901,15 @@ " 525000.0\n", " 7181.942544\n", " https://www.rightmove.co.uk/properties/134574563\n", + " 2\n", " \n", " \n", - " 987\n", + " 988\n", " 72.40\n", " 525000.0\n", " 7251.381215\n", " https://www.rightmove.co.uk/properties/86648925\n", + " 2\n", " \n", " \n", " 272\n", @@ -845,6 +917,7 @@ " 575000.0\n", " 7382.205675\n", " https://www.rightmove.co.uk/properties/141131297\n", + " 2\n", " \n", " \n", " 521\n", @@ -852,6 +925,7 @@ " 525000.0\n", " 7406.884876\n", " https://www.rightmove.co.uk/properties/143514149\n", + " 2\n", " \n", " \n", " 324\n", @@ -859,13 +933,15 @@ " 575000.0\n", " 7555.847569\n", " https://www.rightmove.co.uk/properties/141831353\n", + " 2\n", " \n", " \n", - " 1017\n", + " 1018\n", " 71.80\n", " 550000.0\n", " 7660.167131\n", " https://www.rightmove.co.uk/properties/86804832\n", + " 2\n", " \n", " \n", " 526\n", @@ -873,6 +949,7 @@ " 600000.0\n", " 7692.307692\n", " https://www.rightmove.co.uk/properties/143552156\n", + " 1\n", " \n", " \n", " 817\n", @@ -880,6 +957,7 @@ " 550000.0\n", " 7727.975270\n", " https://www.rightmove.co.uk/properties/145035929\n", + " 2\n", " \n", " \n", " 843\n", @@ -887,6 +965,7 @@ " 600000.0\n", " 7741.935484\n", " https://www.rightmove.co.uk/properties/145144988\n", + " 2\n", " \n", " \n", " 337\n", @@ -894,6 +973,7 @@ " 550000.0\n", " 7790.368272\n", " https://www.rightmove.co.uk/properties/141904286\n", + " 2\n", " \n", " \n", " 233\n", @@ -901,6 +981,7 @@ " 600000.0\n", " 7926.023778\n", " https://www.rightmove.co.uk/properties/140582213\n", + " 2\n", " \n", " \n", " 763\n", @@ -908,6 +989,7 @@ " 600000.0\n", " 8000.000000\n", " https://www.rightmove.co.uk/properties/144862070\n", + " 2\n", " \n", " \n", " 315\n", @@ -915,6 +997,7 @@ " 590000.0\n", " 8274.894811\n", " https://www.rightmove.co.uk/properties/141669686\n", + " 2\n", " \n", " \n", " 899\n", @@ -922,6 +1005,7 @@ " 595000.0\n", " 8325.171401\n", " https://www.rightmove.co.uk/properties/145362911\n", + " 2\n", " \n", " \n", " 147\n", @@ -929,13 +1013,15 @@ " 600000.0\n", " 8391.608392\n", " https://www.rightmove.co.uk/properties/138537527\n", + " 1\n", " \n", " \n", - " 972\n", + " 973\n", " 70.89\n", " 595000.0\n", " 8393.285372\n", " https://www.rightmove.co.uk/properties/86296491\n", + " 2\n", " \n", " \n", "\n", @@ -948,11 +1034,11 @@ "492 73.90 525000.0 7104.194858 \n", "561 76.70 550000.0 7170.795306 \n", "69 73.10 525000.0 7181.942544 \n", - "987 72.40 525000.0 7251.381215 \n", + "988 72.40 525000.0 7251.381215 \n", "272 77.89 575000.0 7382.205675 \n", "521 70.88 525000.0 7406.884876 \n", "324 76.10 575000.0 7555.847569 \n", - "1017 71.80 550000.0 7660.167131 \n", + "1018 71.80 550000.0 7660.167131 \n", "526 78.00 600000.0 7692.307692 \n", "817 71.17 550000.0 7727.975270 \n", "843 77.50 600000.0 7741.935484 \n", @@ -962,32 +1048,32 @@ "315 71.30 590000.0 8274.894811 \n", "899 71.47 595000.0 8325.171401 \n", "147 71.50 600000.0 8391.608392 \n", - "972 70.89 595000.0 8393.285372 \n", + "973 70.89 595000.0 8393.285372 \n", "\n", - " url \n", - "953 https://www.rightmove.co.uk/properties/145546538 \n", - "823 https://www.rightmove.co.uk/properties/145051769 \n", - "492 https://www.rightmove.co.uk/properties/143317361 \n", - "561 https://www.rightmove.co.uk/properties/143780789 \n", - "69 https://www.rightmove.co.uk/properties/134574563 \n", - "987 https://www.rightmove.co.uk/properties/86648925 \n", - "272 https://www.rightmove.co.uk/properties/141131297 \n", - "521 https://www.rightmove.co.uk/properties/143514149 \n", - "324 https://www.rightmove.co.uk/properties/141831353 \n", - "1017 https://www.rightmove.co.uk/properties/86804832 \n", - "526 https://www.rightmove.co.uk/properties/143552156 \n", - "817 https://www.rightmove.co.uk/properties/145035929 \n", - "843 https://www.rightmove.co.uk/properties/145144988 \n", - "337 https://www.rightmove.co.uk/properties/141904286 \n", - "233 https://www.rightmove.co.uk/properties/140582213 \n", - "763 https://www.rightmove.co.uk/properties/144862070 \n", - "315 https://www.rightmove.co.uk/properties/141669686 \n", - "899 https://www.rightmove.co.uk/properties/145362911 \n", - "147 https://www.rightmove.co.uk/properties/138537527 \n", - "972 https://www.rightmove.co.uk/properties/86296491 " + " url bedrooms \n", + "953 https://www.rightmove.co.uk/properties/145546538 2 \n", + "823 https://www.rightmove.co.uk/properties/145051769 2 \n", + "492 https://www.rightmove.co.uk/properties/143317361 2 \n", + "561 https://www.rightmove.co.uk/properties/143780789 2 \n", + "69 https://www.rightmove.co.uk/properties/134574563 2 \n", + "988 https://www.rightmove.co.uk/properties/86648925 2 \n", + "272 https://www.rightmove.co.uk/properties/141131297 2 \n", + "521 https://www.rightmove.co.uk/properties/143514149 2 \n", + "324 https://www.rightmove.co.uk/properties/141831353 2 \n", + "1018 https://www.rightmove.co.uk/properties/86804832 2 \n", + "526 https://www.rightmove.co.uk/properties/143552156 1 \n", + "817 https://www.rightmove.co.uk/properties/145035929 2 \n", + "843 https://www.rightmove.co.uk/properties/145144988 2 \n", + "337 https://www.rightmove.co.uk/properties/141904286 2 \n", + "233 https://www.rightmove.co.uk/properties/140582213 2 \n", + "763 https://www.rightmove.co.uk/properties/144862070 2 \n", + "315 https://www.rightmove.co.uk/properties/141669686 2 \n", + "899 https://www.rightmove.co.uk/properties/145362911 2 \n", + "147 https://www.rightmove.co.uk/properties/138537527 1 \n", + "973 https://www.rightmove.co.uk/properties/86296491 2 " ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -1004,6 +1090,70 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "edd9fa24-cad2-4448-9b17-c6d514564f41", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqm_ocrpriceprice_per_sqmurlbedrooms
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [sqm_ocr, price, price_per_sqm, url, bedrooms]\n", + "Index: []" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.bedrooms > 2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "002b2a3a-3ecc-45c1-8c2f-c143380ee0d5", + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] } ], "metadata": {