{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "f20bddee-1e7c-4c46-a17a-c7bb6c13f30c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/kadir/code/realestate/crawler/venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from data_access import Listing\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "b1101088-9613-465f-81fd-79801e0202b8", "metadata": {}, "outputs": [], "source": [ "ls = Listing.get_all_listings()\n", "\n", "decisions = {\n", " 142789514: 'n',\n", " 136010102: 'n',\n", " 141457334: 'y',\n", " 86778015: 'n',\n", " 134574563: 'n',\n", " 86648925: 'n',\n", " 143319068: 'n',\n", " 135668207: 'n',\n", " 142063949: 'n',\n", " 145051769: 'n',\n", " 138945719: 'n',\n", " 135714833: 'n',\n", " 144983192: 'n',\n", " 144666920: 'n',\n", " 143895080: 'n',\n", " 141114200: 'n',\n", " 145407389: 'n',\n", " 145047533: 'n',\n", " 145161722: 'n',\n", " 145130066: 'n',\n", " 142110470: 'n',\n", " 133667606: 'n',\n", " 145005536: 'n',\n", " 143458961: 'n',\n", " 141412010: 'y',\n", " 138683339: 'n',\n", " 138490370: 'n',\n", " 137805509: 'n',\n", " 135854261: 'n',\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "id": "63e61601-7e3f-4d58-89f6-1794e4868cc3", "metadata": {}, "outputs": [], "source": [ "ds = [l.dict_nicely() for l in ls]" ] }, { "cell_type": "code", "execution_count": 4, "id": "1c222721-f426-42c0-9ac5-badc1f7a2034", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifiersqm_ocrpriceprice_per_sqmurlbedroomstravel_time_fastesttravel_time_secondlease_leftdevelopmentdecision
0102360773NaN350000.0NaNhttps://www.rightmove.co.uk/properties/1023607731{'duration': 2695, 'distance': 6467, 'duration...{'duration': 1682, 'distance': 6810, 'duration...119.0FalseNone
1105836849NaN400000.0NaNhttps://www.rightmove.co.uk/properties/1058368493{'duration': 2565, 'distance': 14070, 'duratio...{'duration': 2565, 'distance': 14070, 'duratio...NaNFalseNone
210723321476.91400000.05200.884150https://www.rightmove.co.uk/properties/1072332143{'duration': 1714, 'distance': 9570, 'duration...{'duration': 1774, 'distance': 9570, 'duration...91.0FalseNone
3107976896112.40800000.07117.437722https://www.rightmove.co.uk/properties/1079768963{'duration': 1862, 'distance': 8278, 'duration...{'duration': 1862, 'distance': 8278, 'duration...NaNFalseNone
4115499441115.60775000.06704.152249https://www.rightmove.co.uk/properties/1154994413{'duration': 2943, 'distance': 7437, 'duration...{'duration': 2167, 'distance': 9920, 'duration...NaNFalseNone
....................................
218186813508NaN750000.0NaNhttps://www.rightmove.co.uk/properties/868135083{'duration': 2400, 'distance': 13983, 'duratio...{'duration': 2605, 'distance': 14702, 'duratio...NaNTrueNone
218286813523NaN655000.0NaNhttps://www.rightmove.co.uk/properties/868135233{'duration': 2400, 'distance': 13983, 'duratio...{'duration': 2605, 'distance': 14702, 'duratio...NaNTrueNone
21838681466982.80550000.06642.512077https://www.rightmove.co.uk/properties/868146693{'duration': 2310, 'distance': 12972, 'duratio...{'duration': 2322, 'distance': 12104, 'duratio...0.0FalseNone
21848695595890.00300000.03333.333333https://www.rightmove.co.uk/properties/869559583{'duration': 2332, 'distance': 6898, 'duration...{'duration': 2248, 'distance': 6893, 'duration...105.0FalseNone
21858697272681.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262{'duration': 1393, 'distance': 6390, 'duration...{'duration': 2733, 'distance': 6490, 'duration...993.0FalseNone
\n", "

2186 rows × 11 columns

\n", "
" ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", "0 102360773 NaN 350000.0 NaN \n", "1 105836849 NaN 400000.0 NaN \n", "2 107233214 76.91 400000.0 5200.884150 \n", "3 107976896 112.40 800000.0 7117.437722 \n", "4 115499441 115.60 775000.0 6704.152249 \n", "... ... ... ... ... \n", "2181 86813508 NaN 750000.0 NaN \n", "2182 86813523 NaN 655000.0 NaN \n", "2183 86814669 82.80 550000.0 6642.512077 \n", "2184 86955958 90.00 300000.0 3333.333333 \n", "2185 86972726 81.60 790000.0 9681.372549 \n", "\n", " url bedrooms \\\n", "0 https://www.rightmove.co.uk/properties/102360773 1 \n", "1 https://www.rightmove.co.uk/properties/105836849 3 \n", "2 https://www.rightmove.co.uk/properties/107233214 3 \n", "3 https://www.rightmove.co.uk/properties/107976896 3 \n", "4 https://www.rightmove.co.uk/properties/115499441 3 \n", "... ... ... \n", "2181 https://www.rightmove.co.uk/properties/86813508 3 \n", "2182 https://www.rightmove.co.uk/properties/86813523 3 \n", "2183 https://www.rightmove.co.uk/properties/86814669 3 \n", "2184 https://www.rightmove.co.uk/properties/86955958 3 \n", "2185 https://www.rightmove.co.uk/properties/86972726 2 \n", "\n", " travel_time_fastest \\\n", "0 {'duration': 2695, 'distance': 6467, 'duration... \n", "1 {'duration': 2565, 'distance': 14070, 'duratio... \n", "2 {'duration': 1714, 'distance': 9570, 'duration... \n", "3 {'duration': 1862, 'distance': 8278, 'duration... \n", "4 {'duration': 2943, 'distance': 7437, 'duration... \n", "... ... \n", "2181 {'duration': 2400, 'distance': 13983, 'duratio... \n", "2182 {'duration': 2400, 'distance': 13983, 'duratio... \n", "2183 {'duration': 2310, 'distance': 12972, 'duratio... \n", "2184 {'duration': 2332, 'distance': 6898, 'duration... \n", "2185 {'duration': 1393, 'distance': 6390, 'duration... \n", "\n", " travel_time_second lease_left \\\n", "0 {'duration': 1682, 'distance': 6810, 'duration... 119.0 \n", "1 {'duration': 2565, 'distance': 14070, 'duratio... NaN \n", "2 {'duration': 1774, 'distance': 9570, 'duration... 91.0 \n", "3 {'duration': 1862, 'distance': 8278, 'duration... NaN \n", "4 {'duration': 2167, 'distance': 9920, 'duration... NaN \n", "... ... ... \n", "2181 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", "2182 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", "2183 {'duration': 2322, 'distance': 12104, 'duratio... 0.0 \n", "2184 {'duration': 2248, 'distance': 6893, 'duration... 105.0 \n", "2185 {'duration': 2733, 'distance': 6490, 'duration... 993.0 \n", "\n", " development decision \n", "0 False None \n", "1 False None \n", "2 False None \n", "3 False None \n", "4 False None \n", "... ... ... \n", "2181 True None \n", "2182 True None \n", "2183 False None \n", "2184 False None \n", "2185 False None \n", "\n", "[2186 rows x 11 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(ds)\n", "df.loc[:, 'decision'] = df.identifier.apply(lambda x: decisions.get(x))\n", "df" ] }, { "cell_type": "code", "execution_count": 5, "id": "d80d9911-9a6d-4608-a6da-11dc864ee32b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2186, 11)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 6, "id": "7b37ad6b-9b0a-444e-b8c3-6fe4e43e42cb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifiersqm_ocrpriceprice_per_sqmurlbedroomslease_leftdevelopmentdecisiona_durationa_initial_walk_durationa_duration_per_transita_number_of_transit_stopsb_durationb_initial_walk_durationb_duration_per_transitb_number_of_transit_stopsmin_duration
0102360773NaN350000.0NaNhttps://www.rightmove.co.uk/properties/1023607731119.0FalseNone2695174{'WALK': 414, 'TRANSIT': 2280}11682.0204.0{'WALK': 608, 'TRANSIT': 804}2.028.0
1105836849NaN400000.0NaNhttps://www.rightmove.co.uk/properties/1058368493NaNFalseNone2565340{'WALK': 750, 'TRANSIT': 1800}12565.0340.0{'WALK': 750, 'TRANSIT': 1800}1.043.0
\n", "
" ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", "0 102360773 NaN 350000.0 NaN \n", "1 105836849 NaN 400000.0 NaN \n", "\n", " url bedrooms lease_left \\\n", "0 https://www.rightmove.co.uk/properties/102360773 1 119.0 \n", "1 https://www.rightmove.co.uk/properties/105836849 3 NaN \n", "\n", " development decision a_duration a_initial_walk_duration \\\n", "0 False None 2695 174 \n", "1 False None 2565 340 \n", "\n", " a_duration_per_transit a_number_of_transit_stops b_duration \\\n", "0 {'WALK': 414, 'TRANSIT': 2280} 1 1682.0 \n", "1 {'WALK': 750, 'TRANSIT': 1800} 1 2565.0 \n", "\n", " b_initial_walk_duration b_duration_per_transit \\\n", "0 204.0 {'WALK': 608, 'TRANSIT': 804} \n", "1 340.0 {'WALK': 750, 'TRANSIT': 1800} \n", "\n", " b_number_of_transit_stops min_duration \n", "0 2.0 28.0 \n", "1 1.0 43.0 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dropcolumns = ['distance_per_transit', 'duration_static', 'distance']\n", "s1 = df['travel_time_fastest'].apply(pd.Series).drop(dropcolumns, axis=1)\n", "s1.columns = ['a_' + c for c in s1.columns]\n", "\n", "s2 = df['travel_time_second'].apply(pd.Series).drop(dropcolumns, axis=1)\n", "s2.columns = ['b_' + c for c in s2.columns]\n", "\n", "df2 = pd.concat([df.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1, s2], axis=1)\n", "df2.loc[:, 'min_duration'] = (df2.loc[:, ['a_duration', 'b_duration']].min(axis=1) / 60).round()\n", "df2.head(2)" ] }, { "cell_type": "code", "execution_count": 7, "id": "8c75aaa6-6113-482f-809b-11e405510184", "metadata": {}, "outputs": [], "source": [ "df2.to_clipboard()" ] }, { "cell_type": "code", "execution_count": 8, "id": "79f99692-91e8-4915-9b57-7b3a1efd7d3a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifiersqm_ocrpriceprice_per_sqmurlbedroomslease_leftdevelopmentdecisiondurationinitial_walk_durationduration_per_transitnumber_of_transit_stops
0102360773NaN350000.0NaNhttps://www.rightmove.co.uk/properties/1023607731119.0FalseNone45174{'WALK': 414, 'TRANSIT': 2280}1
1105836849NaN400000.0NaNhttps://www.rightmove.co.uk/properties/1058368493NaNFalseNone43340{'WALK': 750, 'TRANSIT': 1800}1
\n", "
" ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", "0 102360773 NaN 350000.0 NaN \n", "1 105836849 NaN 400000.0 NaN \n", "\n", " url bedrooms lease_left \\\n", "0 https://www.rightmove.co.uk/properties/102360773 1 119.0 \n", "1 https://www.rightmove.co.uk/properties/105836849 3 NaN \n", "\n", " development decision duration initial_walk_duration \\\n", "0 False None 45 174 \n", "1 False None 43 340 \n", "\n", " duration_per_transit number_of_transit_stops \n", "0 {'WALK': 414, 'TRANSIT': 2280} 1 \n", "1 {'WALK': 750, 'TRANSIT': 1800} 1 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dropcolumns = ['distance_per_transit', 'duration_static', 'distance']\n", "s1 = df['travel_time_fastest'].apply(pd.Series).drop(dropcolumns, axis=1)\n", "\n", "df3 = pd.concat([df.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1], axis=1)\n", "df3.loc[:, 'duration'] = (df3.loc[:, ['duration']].min(axis=1) / 60).round()\n", "df3.to_clipboard()\n", "df3.head(2)" ] }, { "cell_type": "code", "execution_count": null, "id": "abcbde40-7432-4449-957a-79ce2ca126eb", "metadata": {}, "outputs": [], "source": [ "\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "99c5b304-3d13-466b-a9f5-83a5db6311b5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sqm_ocrpriceprice_per_sqmurlbedrooms
95374142.40550000.07.418157https://www.rightmove.co.uk/properties/1455465382
24921850.10725000.033.180626https://www.rightmove.co.uk/properties/1408217362
177137.50695000.05054.545455https://www.rightmove.co.uk/properties/1393542592
35285.50475000.05555.555556https://www.rightmove.co.uk/properties/1421423482
3682.90475000.05729.794934https://www.rightmove.co.uk/properties/1289259502
700101.00650000.06435.643564https://www.rightmove.co.uk/properties/1445915721
82380.40525000.06529.850746https://www.rightmove.co.uk/properties/1450517692
21491.51650000.07103.048847https://www.rightmove.co.uk/properties/1403263072
17191.10650000.07135.016465https://www.rightmove.co.uk/properties/1392454282
598109.40795000.07266.910420https://www.rightmove.co.uk/properties/1440346552
14693.78700000.07464.278098https://www.rightmove.co.uk/properties/1385103952
711100.31750000.07476.821852https://www.rightmove.co.uk/properties/1446203032
59286.49650000.07515.319690https://www.rightmove.co.uk/properties/1439876692
624101.35775000.07646.768624https://www.rightmove.co.uk/properties/1442179221
5597.60750000.07684.426230https://www.rightmove.co.uk/properties/1325647372
85181.01625000.07715.096902https://www.rightmove.co.uk/properties/1451725042
35689.00695000.07808.988764https://www.rightmove.co.uk/properties/1421856232
74885.90675000.07857.974389https://www.rightmove.co.uk/properties/1447712812
20482.40650000.07888.349515https://www.rightmove.co.uk/properties/1401733192
65483.33665000.07980.319213https://www.rightmove.co.uk/properties/1443611002
51487.50700000.08000.000000https://www.rightmove.co.uk/properties/1434603651
76292.90750000.08073.196986https://www.rightmove.co.uk/properties/1448467251
96392.90750000.08073.196986https://www.rightmove.co.uk/properties/1455652522
93886.10699000.08118.466899https://www.rightmove.co.uk/properties/1454911372
40390.60750000.08278.145695https://www.rightmove.co.uk/properties/1427044162
30293.30795000.08520.900322https://www.rightmove.co.uk/properties/1415954332
67193.27800000.08577.248847https://www.rightmove.co.uk/properties/1444291402
34980.57695000.08626.039469https://www.rightmove.co.uk/properties/1421159182
22490.10800000.08879.023307https://www.rightmove.co.uk/properties/1404644812
74081.70735000.08996.328029https://www.rightmove.co.uk/properties/1447224142
32687.30800000.09163.802978https://www.rightmove.co.uk/properties/1418460232
31981.57750000.09194.556822https://www.rightmove.co.uk/properties/1417973572
55886.77800000.09219.776420https://www.rightmove.co.uk/properties/1437587632
71286.30800000.09269.988413https://www.rightmove.co.uk/properties/1446221572
47386.00800000.09302.325581https://www.rightmove.co.uk/properties/1432101022
10580.40750000.09328.358209https://www.rightmove.co.uk/properties/1369887262
100380.27750000.09343.465803https://www.rightmove.co.uk/properties/867752912
23582.80775000.09359.903382https://www.rightmove.co.uk/properties/1406110552
6585.10800000.09400.705053https://www.rightmove.co.uk/properties/1341162322
3083.70795000.09498.207885https://www.rightmove.co.uk/properties/1277879601
102581.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262
8881.75800000.09785.932722https://www.rightmove.co.uk/properties/1360121931
45480.80800000.09900.990099https://www.rightmove.co.uk/properties/1431388672
34380.64800000.09920.634921https://www.rightmove.co.uk/properties/1420329352
\n", "
" ], "text/plain": [ " sqm_ocr price price_per_sqm \\\n", "953 74142.40 550000.0 7.418157 \n", "249 21850.10 725000.0 33.180626 \n", "177 137.50 695000.0 5054.545455 \n", "352 85.50 475000.0 5555.555556 \n", "36 82.90 475000.0 5729.794934 \n", "700 101.00 650000.0 6435.643564 \n", "823 80.40 525000.0 6529.850746 \n", "214 91.51 650000.0 7103.048847 \n", "171 91.10 650000.0 7135.016465 \n", "598 109.40 795000.0 7266.910420 \n", "146 93.78 700000.0 7464.278098 \n", "711 100.31 750000.0 7476.821852 \n", "592 86.49 650000.0 7515.319690 \n", "624 101.35 775000.0 7646.768624 \n", "55 97.60 750000.0 7684.426230 \n", "851 81.01 625000.0 7715.096902 \n", "356 89.00 695000.0 7808.988764 \n", "748 85.90 675000.0 7857.974389 \n", "204 82.40 650000.0 7888.349515 \n", "654 83.33 665000.0 7980.319213 \n", "514 87.50 700000.0 8000.000000 \n", "762 92.90 750000.0 8073.196986 \n", "963 92.90 750000.0 8073.196986 \n", "938 86.10 699000.0 8118.466899 \n", "403 90.60 750000.0 8278.145695 \n", "302 93.30 795000.0 8520.900322 \n", "671 93.27 800000.0 8577.248847 \n", "349 80.57 695000.0 8626.039469 \n", "224 90.10 800000.0 8879.023307 \n", "740 81.70 735000.0 8996.328029 \n", "326 87.30 800000.0 9163.802978 \n", "319 81.57 750000.0 9194.556822 \n", "558 86.77 800000.0 9219.776420 \n", "712 86.30 800000.0 9269.988413 \n", "473 86.00 800000.0 9302.325581 \n", "105 80.40 750000.0 9328.358209 \n", "1003 80.27 750000.0 9343.465803 \n", "235 82.80 775000.0 9359.903382 \n", "65 85.10 800000.0 9400.705053 \n", "30 83.70 795000.0 9498.207885 \n", "1025 81.60 790000.0 9681.372549 \n", "88 81.75 800000.0 9785.932722 \n", "454 80.80 800000.0 9900.990099 \n", "343 80.64 800000.0 9920.634921 \n", "\n", " url bedrooms \n", "953 https://www.rightmove.co.uk/properties/145546538 2 \n", "249 https://www.rightmove.co.uk/properties/140821736 2 \n", "177 https://www.rightmove.co.uk/properties/139354259 2 \n", "352 https://www.rightmove.co.uk/properties/142142348 2 \n", "36 https://www.rightmove.co.uk/properties/128925950 2 \n", "700 https://www.rightmove.co.uk/properties/144591572 1 \n", "823 https://www.rightmove.co.uk/properties/145051769 2 \n", "214 https://www.rightmove.co.uk/properties/140326307 2 \n", "171 https://www.rightmove.co.uk/properties/139245428 2 \n", "598 https://www.rightmove.co.uk/properties/144034655 2 \n", "146 https://www.rightmove.co.uk/properties/138510395 2 \n", "711 https://www.rightmove.co.uk/properties/144620303 2 \n", "592 https://www.rightmove.co.uk/properties/143987669 2 \n", "624 https://www.rightmove.co.uk/properties/144217922 1 \n", "55 https://www.rightmove.co.uk/properties/132564737 2 \n", "851 https://www.rightmove.co.uk/properties/145172504 2 \n", "356 https://www.rightmove.co.uk/properties/142185623 2 \n", "748 https://www.rightmove.co.uk/properties/144771281 2 \n", "204 https://www.rightmove.co.uk/properties/140173319 2 \n", "654 https://www.rightmove.co.uk/properties/144361100 2 \n", "514 https://www.rightmove.co.uk/properties/143460365 1 \n", "762 https://www.rightmove.co.uk/properties/144846725 1 \n", "963 https://www.rightmove.co.uk/properties/145565252 2 \n", "938 https://www.rightmove.co.uk/properties/145491137 2 \n", "403 https://www.rightmove.co.uk/properties/142704416 2 \n", "302 https://www.rightmove.co.uk/properties/141595433 2 \n", "671 https://www.rightmove.co.uk/properties/144429140 2 \n", "349 https://www.rightmove.co.uk/properties/142115918 2 \n", "224 https://www.rightmove.co.uk/properties/140464481 2 \n", "740 https://www.rightmove.co.uk/properties/144722414 2 \n", "326 https://www.rightmove.co.uk/properties/141846023 2 \n", "319 https://www.rightmove.co.uk/properties/141797357 2 \n", "558 https://www.rightmove.co.uk/properties/143758763 2 \n", "712 https://www.rightmove.co.uk/properties/144622157 2 \n", "473 https://www.rightmove.co.uk/properties/143210102 2 \n", "105 https://www.rightmove.co.uk/properties/136988726 2 \n", "1003 https://www.rightmove.co.uk/properties/86775291 2 \n", "235 https://www.rightmove.co.uk/properties/140611055 2 \n", "65 https://www.rightmove.co.uk/properties/134116232 2 \n", "30 https://www.rightmove.co.uk/properties/127787960 1 \n", "1025 https://www.rightmove.co.uk/properties/86972726 2 \n", "88 https://www.rightmove.co.uk/properties/136012193 1 \n", "454 https://www.rightmove.co.uk/properties/143138867 2 \n", "343 https://www.rightmove.co.uk/properties/142032935 2 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = df[df.sqm_ocr > 80]\n", "df2.sort_values('price_per_sqm')" ] }, { "cell_type": "code", "execution_count": 6, "id": "52545cfa-0932-46fe-ba7e-961fd43f2786", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sqm_ocrpriceprice_per_sqmurlbedrooms
35973.40400000.05449.591281https://www.rightmove.co.uk/properties/1421869912
29377.00425000.05519.480519https://www.rightmove.co.uk/properties/1414377832
35285.50475000.05555.555556https://www.rightmove.co.uk/properties/1421423482
68576.03425000.05589.898724https://www.rightmove.co.uk/properties/1444940122
3682.90475000.05729.794934https://www.rightmove.co.uk/properties/1289259502
49171.68450000.06277.901786https://www.rightmove.co.uk/properties/1433158402
102073.67495000.06719.152980https://www.rightmove.co.uk/properties/868079162
\n", "
" ], "text/plain": [ " sqm_ocr price price_per_sqm \\\n", "359 73.40 400000.0 5449.591281 \n", "293 77.00 425000.0 5519.480519 \n", "352 85.50 475000.0 5555.555556 \n", "685 76.03 425000.0 5589.898724 \n", "36 82.90 475000.0 5729.794934 \n", "491 71.68 450000.0 6277.901786 \n", "1020 73.67 495000.0 6719.152980 \n", "\n", " url bedrooms \n", "359 https://www.rightmove.co.uk/properties/142186991 2 \n", "293 https://www.rightmove.co.uk/properties/141437783 2 \n", "352 https://www.rightmove.co.uk/properties/142142348 2 \n", "685 https://www.rightmove.co.uk/properties/144494012 2 \n", "36 https://www.rightmove.co.uk/properties/128925950 2 \n", "491 https://www.rightmove.co.uk/properties/143315840 2 \n", "1020 https://www.rightmove.co.uk/properties/86807916 2 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = df[(df.sqm_ocr > 70) & (df.price <500000)]\n", "df2.sort_values('price_per_sqm')" ] }, { "cell_type": "code", "execution_count": 7, "id": "d0246926-13ef-4110-8e3a-fb676a55c2a6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sqm_ocrpriceprice_per_sqmurlbedrooms
95374142.40550000.07.418157https://www.rightmove.co.uk/properties/1455465382
82380.40525000.06529.850746https://www.rightmove.co.uk/properties/1450517692
49273.90525000.07104.194858https://www.rightmove.co.uk/properties/1433173612
56176.70550000.07170.795306https://www.rightmove.co.uk/properties/1437807892
6973.10525000.07181.942544https://www.rightmove.co.uk/properties/1345745632
98872.40525000.07251.381215https://www.rightmove.co.uk/properties/866489252
27277.89575000.07382.205675https://www.rightmove.co.uk/properties/1411312972
52170.88525000.07406.884876https://www.rightmove.co.uk/properties/1435141492
32476.10575000.07555.847569https://www.rightmove.co.uk/properties/1418313532
101871.80550000.07660.167131https://www.rightmove.co.uk/properties/868048322
52678.00600000.07692.307692https://www.rightmove.co.uk/properties/1435521561
81771.17550000.07727.975270https://www.rightmove.co.uk/properties/1450359292
84377.50600000.07741.935484https://www.rightmove.co.uk/properties/1451449882
33770.60550000.07790.368272https://www.rightmove.co.uk/properties/1419042862
23375.70600000.07926.023778https://www.rightmove.co.uk/properties/1405822132
76375.00600000.08000.000000https://www.rightmove.co.uk/properties/1448620702
31571.30590000.08274.894811https://www.rightmove.co.uk/properties/1416696862
89971.47595000.08325.171401https://www.rightmove.co.uk/properties/1453629112
14771.50600000.08391.608392https://www.rightmove.co.uk/properties/1385375271
97370.89595000.08393.285372https://www.rightmove.co.uk/properties/862964912
\n", "
" ], "text/plain": [ " sqm_ocr price price_per_sqm \\\n", "953 74142.40 550000.0 7.418157 \n", "823 80.40 525000.0 6529.850746 \n", "492 73.90 525000.0 7104.194858 \n", "561 76.70 550000.0 7170.795306 \n", "69 73.10 525000.0 7181.942544 \n", "988 72.40 525000.0 7251.381215 \n", "272 77.89 575000.0 7382.205675 \n", "521 70.88 525000.0 7406.884876 \n", "324 76.10 575000.0 7555.847569 \n", "1018 71.80 550000.0 7660.167131 \n", "526 78.00 600000.0 7692.307692 \n", "817 71.17 550000.0 7727.975270 \n", "843 77.50 600000.0 7741.935484 \n", "337 70.60 550000.0 7790.368272 \n", "233 75.70 600000.0 7926.023778 \n", "763 75.00 600000.0 8000.000000 \n", "315 71.30 590000.0 8274.894811 \n", "899 71.47 595000.0 8325.171401 \n", "147 71.50 600000.0 8391.608392 \n", "973 70.89 595000.0 8393.285372 \n", "\n", " url bedrooms \n", "953 https://www.rightmove.co.uk/properties/145546538 2 \n", "823 https://www.rightmove.co.uk/properties/145051769 2 \n", "492 https://www.rightmove.co.uk/properties/143317361 2 \n", "561 https://www.rightmove.co.uk/properties/143780789 2 \n", "69 https://www.rightmove.co.uk/properties/134574563 2 \n", "988 https://www.rightmove.co.uk/properties/86648925 2 \n", "272 https://www.rightmove.co.uk/properties/141131297 2 \n", "521 https://www.rightmove.co.uk/properties/143514149 2 \n", "324 https://www.rightmove.co.uk/properties/141831353 2 \n", "1018 https://www.rightmove.co.uk/properties/86804832 2 \n", "526 https://www.rightmove.co.uk/properties/143552156 1 \n", "817 https://www.rightmove.co.uk/properties/145035929 2 \n", "843 https://www.rightmove.co.uk/properties/145144988 2 \n", "337 https://www.rightmove.co.uk/properties/141904286 2 \n", "233 https://www.rightmove.co.uk/properties/140582213 2 \n", "763 https://www.rightmove.co.uk/properties/144862070 2 \n", "315 https://www.rightmove.co.uk/properties/141669686 2 \n", "899 https://www.rightmove.co.uk/properties/145362911 2 \n", "147 https://www.rightmove.co.uk/properties/138537527 1 \n", "973 https://www.rightmove.co.uk/properties/86296491 2 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = df[(df.sqm_ocr > 70) & (df.price <=600000)& (df.price >500000)]\n", "df2.sort_values('price_per_sqm')" ] }, { "cell_type": "code", "execution_count": null, "id": "b9f2ca7b-8476-47f1-a34f-7acdfbe84bd1", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 15, "id": "edd9fa24-cad2-4448-9b17-c6d514564f41", "metadata": {}, "outputs": [], "source": [ "from data_access import Listing\n", "import pytesseract\n", "from PIL import Image\n", "import re" ] }, { "cell_type": "code", "execution_count": 18, "id": "002b2a3a-3ecc-45c1-8c2f-c143380ee0d5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('3.14', 'm'), ('3.43', 'm'), ('3.89', 'm'), ('3.62', 'm'), ('3.89', 'm'), ('2.88', 'm'), ('75.3', 'sq. m'), ('4.07', 'm'), ('6.18', 'm'), ('2.79', 'm'), ('3.34', 'm'), ('2.79', 'm'), ('4.34', 'm'), ('1.76', 'm'), ('2.29', 'm'), ('1.76', 'm'), ('1.92', 'm'), ('75.3', 'sq. m')]\n", "-----\n", "Bedroom 2\n", "Bedroom 1 3.14m x 3.43m\n", "3.89m x 3.62m (10'4\" x 11'3\")\n", "(12'9\" x 11'10\")\n", "\n", "Bedroom 3\n", "3.89m x 2.88m\n", "(12'9\" x 9'5\")\n", "\n", "xt\n", "\n", "v\n", "\n", "Levita House NW1\n", "\n", "Approx. 75.3 sq. metres (810.2 sq. feet)\n", "\n", "Storage\n", "\n", ". Hall\n", "Kitchen 4.07m x 6.18m\n", "2.79m x 3.34m (13'4\" x 20'3\") Bedroom 4\n", "\n", "(9'2\" x 10'11\") 2.79m x 4.34m\n", "\n", "(9'2\" x 14'3\")\n", "\n", "Bathroom 1\n", "1.76m x 2.29m\n", "(5'9\" x 76\")\n", "\n", "Bathroom 2\n", "1.76m x 1.92m\n", "(5'9\" x 64\")\n", "\n", "Total area: approx. 75.3 sq. metres (810.2 sq. feet)\n", "\n" ] } ], "source": [ "l = Listing(144497822)\n", "for path in l.path_floorplan_folder().iterdir():\n", " img = Image.open(path)\n", " text = pytesseract.image_to_string(img)\n", " sqmregex = r'(\\d+\\.\\d*) ?(sq ?m|sq. ?m|m)'\n", " matches = re.findall(sqmregex, text.lower())\n", " print(matches)\n", " print(\"-----\")\n", " print(text)" ] }, { "cell_type": "code", "execution_count": null, "id": "015e870d-0cf0-4d07-a9ae-4e80d128b26c", "metadata": {}, "outputs": [], "source": [ "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.0" } }, "nbformat": 4, "nbformat_minor": 5 }