From 335adc0856b00acacbe073541ba46578765586ee Mon Sep 17 00:00:00 2001 From: Kadir Date: Wed, 13 Mar 2024 16:24:57 +0000 Subject: [PATCH] add routing, incremental crawling, travel time, lease and development --- .../exploration-checkpoint.ipynb | 1712 ++++++++++++++++- crawler/1_dump_listings.py | 2 +- crawler/4_detect_floorplan.py | 2 +- crawler/data_access.py | 51 +- crawler/exploration.ipynb | 955 ++++++++- 5 files changed, 2652 insertions(+), 70 deletions(-) diff --git a/crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb b/crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb index 363fcab..480a582 100644 --- a/crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb +++ b/crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb @@ -1,6 +1,1714 @@ { - "cells": [], - "metadata": {}, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f20bddee-1e7c-4c46-a17a-c7bb6c13f30c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/kadir/code/realestate/crawler/venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from data_access import Listing\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b1101088-9613-465f-81fd-79801e0202b8", + "metadata": {}, + "outputs": [], + "source": [ + "ls = Listing.get_all_listings()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "63e61601-7e3f-4d58-89f6-1794e4868cc3", + "metadata": {}, + "outputs": [], + "source": [ + "ds = [l.dict_nicely() for l in ls]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1c222721-f426-42c0-9ac5-badc1f7a2034", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqm_ocrpriceprice_per_sqmurlbedroomstravel_time_fastesttravel_time_secondlease_leftdevelopment
07.81350000.044814.340589https://www.rightmove.co.uk/properties/1023607731{'duration': 2695, 'distance': 6467, 'duration...{'duration': 1682, 'distance': 6810, 'duration...119.0False
1NaN400000.0NaNhttps://www.rightmove.co.uk/properties/1058368493{'duration': 2565, 'distance': 14070, 'duratio...{'duration': 2565, 'distance': 14070, 'duratio...NaNFalse
276.91400000.05200.884150https://www.rightmove.co.uk/properties/1072332143{'duration': 1714, 'distance': 9570, 'duration...{'duration': 1774, 'distance': 9570, 'duration...91.0False
3112.40800000.07117.437722https://www.rightmove.co.uk/properties/1079768963{'duration': 1862, 'distance': 8278, 'duration...{'duration': 1862, 'distance': 8278, 'duration...NaNFalse
4115.60775000.06704.152249https://www.rightmove.co.uk/properties/1154994413{'duration': 2943, 'distance': 7437, 'duration...{'duration': 2167, 'distance': 9920, 'duration...NaNFalse
..............................
2129NaN750000.0NaNhttps://www.rightmove.co.uk/properties/868135083{'duration': 2400, 'distance': 13983, 'duratio...{'duration': 2605, 'distance': 14702, 'duratio...NaNTrue
2130NaN655000.0NaNhttps://www.rightmove.co.uk/properties/868135233{'duration': 2400, 'distance': 13983, 'duratio...{'duration': 2605, 'distance': 14702, 'duratio...NaNTrue
213182.80550000.06642.512077https://www.rightmove.co.uk/properties/868146693{'duration': 2310, 'distance': 12972, 'duratio...{'duration': 2322, 'distance': 12104, 'duratio...0.0False
21325.52300000.054347.826087https://www.rightmove.co.uk/properties/869559583{'duration': 2332, 'distance': 6898, 'duration...{'duration': 2248, 'distance': 6893, 'duration...105.0False
213381.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262{'duration': 1393, 'distance': 6390, 'duration...{'duration': 2733, 'distance': 6490, 'duration...993.0False
\n", + "

2134 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " sqm_ocr price price_per_sqm \\\n", + "0 7.81 350000.0 44814.340589 \n", + "1 NaN 400000.0 NaN \n", + "2 76.91 400000.0 5200.884150 \n", + "3 112.40 800000.0 7117.437722 \n", + "4 115.60 775000.0 6704.152249 \n", + "... ... ... ... \n", + "2129 NaN 750000.0 NaN \n", + "2130 NaN 655000.0 NaN \n", + "2131 82.80 550000.0 6642.512077 \n", + "2132 5.52 300000.0 54347.826087 \n", + "2133 81.60 790000.0 9681.372549 \n", + "\n", + " url bedrooms \\\n", + "0 https://www.rightmove.co.uk/properties/102360773 1 \n", + "1 https://www.rightmove.co.uk/properties/105836849 3 \n", + "2 https://www.rightmove.co.uk/properties/107233214 3 \n", + "3 https://www.rightmove.co.uk/properties/107976896 3 \n", + "4 https://www.rightmove.co.uk/properties/115499441 3 \n", + "... ... ... \n", + "2129 https://www.rightmove.co.uk/properties/86813508 3 \n", + "2130 https://www.rightmove.co.uk/properties/86813523 3 \n", + "2131 https://www.rightmove.co.uk/properties/86814669 3 \n", + "2132 https://www.rightmove.co.uk/properties/86955958 3 \n", + "2133 https://www.rightmove.co.uk/properties/86972726 2 \n", + "\n", + " travel_time_fastest \\\n", + "0 {'duration': 2695, 'distance': 6467, 'duration... \n", + "1 {'duration': 2565, 'distance': 14070, 'duratio... \n", + "2 {'duration': 1714, 'distance': 9570, 'duration... \n", + "3 {'duration': 1862, 'distance': 8278, 'duration... \n", + "4 {'duration': 2943, 'distance': 7437, 'duration... \n", + "... ... \n", + "2129 {'duration': 2400, 'distance': 13983, 'duratio... \n", + "2130 {'duration': 2400, 'distance': 13983, 'duratio... \n", + "2131 {'duration': 2310, 'distance': 12972, 'duratio... \n", + "2132 {'duration': 2332, 'distance': 6898, 'duration... \n", + "2133 {'duration': 1393, 'distance': 6390, 'duration... \n", + "\n", + " travel_time_second lease_left \\\n", + "0 {'duration': 1682, 'distance': 6810, 'duration... 119.0 \n", + "1 {'duration': 2565, 'distance': 14070, 'duratio... NaN \n", + "2 {'duration': 1774, 'distance': 9570, 'duration... 91.0 \n", + "3 {'duration': 1862, 'distance': 8278, 'duration... NaN \n", + "4 {'duration': 2167, 'distance': 9920, 'duration... NaN \n", + "... ... ... \n", + "2129 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", + "2130 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", + "2131 {'duration': 2322, 'distance': 12104, 'duratio... 0.0 \n", + "2132 {'duration': 2248, 'distance': 6893, 'duration... 105.0 \n", + "2133 {'duration': 2733, 'distance': 6490, 'duration... 993.0 \n", + "\n", + " development \n", + "0 False \n", + "1 False \n", + "2 False \n", + "3 False \n", + "4 False \n", + "... ... \n", + "2129 True \n", + "2130 True \n", + "2131 False \n", + "2132 False \n", + "2133 False \n", + "\n", + "[2134 rows x 9 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(ds)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "387c48d9-26c6-4bed-8201-352735c06acb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1682.0\n", + "1 2565.0\n", + "2 1714.0\n", + "3 1862.0\n", + "4 2167.0\n", + " ... \n", + "2129 2400.0\n", + "2130 2400.0\n", + "2131 2310.0\n", + "2132 2248.0\n", + "2133 1393.0\n", + "Length: 2134, dtype: float64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7b37ad6b-9b0a-444e-b8c3-6fe4e43e42cb", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/wl/kx43lvyn6yv7lq988gwrkq_m0000gn/T/ipykernel_33778/1787981707.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0ms2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'travel_time_second'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdropcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0ms2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'b_'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mc\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ms2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mdf2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'travel_time_fastest'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'travel_time_second'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ms1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ms2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'min_duration'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ma_duration\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mb_duration\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/code/realestate/crawler/venv/lib/python3.12/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1574\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mfinal\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1575\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mNoReturn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1576\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 1577\u001b[0m \u001b[0;34mf\"\u001b[0m\u001b[0;34mThe truth value of a \u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m is ambiguous. \u001b[0m\u001b[0;34m\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1578\u001b[0m \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1579\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ] + } + ], + "source": [ + "dropcolumns = ['distance_per_transit', 'duration_static', 'distance']\n", + "s1 = df['travel_time_fastest'].apply(pd.Series).drop(dropcolumns, axis=1)\n", + "s1.columns = ['a_' + c for c in s1.columns]\n", + "\n", + "s2 = df['travel_time_second'].apply(pd.Series).drop(dropcolumns, axis=1)\n", + "s2.columns = ['b_' + c for c in s2.columns]\n", + "\n", + "df2 = pd.concat([df.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1, s2], axis=1)\n", + "df2.loc[:, 'min_duration'] = df2.loc[:, ['a_duration', 'b_duration']].min(axis=1)\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8c75aaa6-6113-482f-809b-11e405510184", + "metadata": {}, + "outputs": [], + "source": [ + "df2.to_clipboard()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "79f99692-91e8-4915-9b57-7b3a1efd7d3a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqm_ocrpriceprice_per_sqmurlbedroomsdurationdistanceduration_staticduration_per_transitdistance_per_transitnumber_of_transit_stopsdurationdistanceduration_staticduration_per_transitdistance_per_transitnumber_of_transit_stops
07.81350000.044814.340589https://www.rightmove.co.uk/properties/1023607731269564672695{'WALK': 414, 'TRANSIT': 2280}{'WALK': 465, 'TRANSIT': 6002}11682.06810.01682.0{'WALK': 608, 'TRANSIT': 804}{'WALK': 582, 'TRANSIT': 6228}2.0
1NaN400000.0NaNhttps://www.rightmove.co.uk/properties/10583684932565140702565{'WALK': 750, 'TRANSIT': 1800}{'WALK': 856, 'TRANSIT': 13214}12565.014070.02565.0{'WALK': 750, 'TRANSIT': 1800}{'WALK': 856, 'TRANSIT': 13214}1.0
276.91400000.05200.884150https://www.rightmove.co.uk/properties/1072332143171495701714{'WALK': 903, 'TRANSIT': 780}{'WALK': 1035, 'TRANSIT': 8535}11774.09570.01774.0{'WALK': 903, 'TRANSIT': 840}{'WALK': 1035, 'TRANSIT': 8535}1.0
3112.40800000.07117.437722https://www.rightmove.co.uk/properties/1079768963186282781862{'WALK': 635, 'TRANSIT': 900}{'WALK': 710, 'TRANSIT': 7568}21862.08278.01862.0{'WALK': 635, 'TRANSIT': 960}{'WALK': 710, 'TRANSIT': 7568}2.0
4115.60775000.06704.152249https://www.rightmove.co.uk/properties/1154994413294374372943{'WALK': 242, 'TRANSIT': 2700}{'WALK': 276, 'TRANSIT': 7161}12167.09920.02167.0{'WALK': 658, 'TRANSIT': 1200}{'WALK': 720, 'TRANSIT': 9200}2.0
......................................................
2046NaN750000.0NaNhttps://www.rightmove.co.uk/properties/8681350832400139832400{'WALK': 603, 'TRANSIT': 1524}{'WALK': 671, 'TRANSIT': 13312}22605.014702.02605.0{'WALK': 1467, 'TRANSIT': 1132}{'WALK': 1698, 'TRANSIT': 13004}1.0
2047NaN655000.0NaNhttps://www.rightmove.co.uk/properties/8681352332400139832400{'WALK': 603, 'TRANSIT': 1524}{'WALK': 671, 'TRANSIT': 13312}22605.014702.02605.0{'WALK': 1467, 'TRANSIT': 1132}{'WALK': 1698, 'TRANSIT': 13004}1.0
204882.80550000.06642.512077https://www.rightmove.co.uk/properties/8681466932310129722310{'WALK': 786, 'TRANSIT': 1277}{'WALK': 885, 'TRANSIT': 12087}22322.012104.02322.0{'WALK': 991, 'TRANSIT': 1080}{'WALK': 1089, 'TRANSIT': 11015}2.0
20495.52300000.054347.826087https://www.rightmove.co.uk/properties/869559583233268982332{'WALK': 1671, 'TRANSIT': 660}{'WALK': 1945, 'TRANSIT': 4953}12248.06893.02248.0{'WALK': 1671, 'TRANSIT': 576}{'WALK': 1945, 'TRANSIT': 4948}1.0
205081.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262139363901393{'WALK': 518, 'TRANSIT': 564}{'WALK': 441, 'TRANSIT': 5949}22733.06490.02733.0{'WALK': 451, 'TRANSIT': 2280}{'WALK': 488, 'TRANSIT': 6002}1.0
\n", + "

2051 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " sqm_ocr price price_per_sqm \\\n", + "0 7.81 350000.0 44814.340589 \n", + "1 NaN 400000.0 NaN \n", + "2 76.91 400000.0 5200.884150 \n", + "3 112.40 800000.0 7117.437722 \n", + "4 115.60 775000.0 6704.152249 \n", + "... ... ... ... \n", + "2046 NaN 750000.0 NaN \n", + "2047 NaN 655000.0 NaN \n", + "2048 82.80 550000.0 6642.512077 \n", + "2049 5.52 300000.0 54347.826087 \n", + "2050 81.60 790000.0 9681.372549 \n", + "\n", + " url bedrooms duration \\\n", + "0 https://www.rightmove.co.uk/properties/102360773 1 2695 \n", + "1 https://www.rightmove.co.uk/properties/105836849 3 2565 \n", + "2 https://www.rightmove.co.uk/properties/107233214 3 1714 \n", + "3 https://www.rightmove.co.uk/properties/107976896 3 1862 \n", + "4 https://www.rightmove.co.uk/properties/115499441 3 2943 \n", + "... ... ... ... \n", + "2046 https://www.rightmove.co.uk/properties/86813508 3 2400 \n", + "2047 https://www.rightmove.co.uk/properties/86813523 3 2400 \n", + "2048 https://www.rightmove.co.uk/properties/86814669 3 2310 \n", + "2049 https://www.rightmove.co.uk/properties/86955958 3 2332 \n", + "2050 https://www.rightmove.co.uk/properties/86972726 2 1393 \n", + "\n", + " distance duration_static duration_per_transit \\\n", + "0 6467 2695 {'WALK': 414, 'TRANSIT': 2280} \n", + "1 14070 2565 {'WALK': 750, 'TRANSIT': 1800} \n", + "2 9570 1714 {'WALK': 903, 'TRANSIT': 780} \n", + "3 8278 1862 {'WALK': 635, 'TRANSIT': 900} \n", + "4 7437 2943 {'WALK': 242, 'TRANSIT': 2700} \n", + "... ... ... ... \n", + "2046 13983 2400 {'WALK': 603, 'TRANSIT': 1524} \n", + "2047 13983 2400 {'WALK': 603, 'TRANSIT': 1524} \n", + "2048 12972 2310 {'WALK': 786, 'TRANSIT': 1277} \n", + "2049 6898 2332 {'WALK': 1671, 'TRANSIT': 660} \n", + "2050 6390 1393 {'WALK': 518, 'TRANSIT': 564} \n", + "\n", + " distance_per_transit number_of_transit_stops duration \\\n", + "0 {'WALK': 465, 'TRANSIT': 6002} 1 1682.0 \n", + "1 {'WALK': 856, 'TRANSIT': 13214} 1 2565.0 \n", + "2 {'WALK': 1035, 'TRANSIT': 8535} 1 1774.0 \n", + "3 {'WALK': 710, 'TRANSIT': 7568} 2 1862.0 \n", + "4 {'WALK': 276, 'TRANSIT': 7161} 1 2167.0 \n", + "... ... ... ... \n", + "2046 {'WALK': 671, 'TRANSIT': 13312} 2 2605.0 \n", + "2047 {'WALK': 671, 'TRANSIT': 13312} 2 2605.0 \n", + "2048 {'WALK': 885, 'TRANSIT': 12087} 2 2322.0 \n", + "2049 {'WALK': 1945, 'TRANSIT': 4953} 1 2248.0 \n", + "2050 {'WALK': 441, 'TRANSIT': 5949} 2 2733.0 \n", + "\n", + " distance duration_static duration_per_transit \\\n", + "0 6810.0 1682.0 {'WALK': 608, 'TRANSIT': 804} \n", + "1 14070.0 2565.0 {'WALK': 750, 'TRANSIT': 1800} \n", + "2 9570.0 1774.0 {'WALK': 903, 'TRANSIT': 840} \n", + "3 8278.0 1862.0 {'WALK': 635, 'TRANSIT': 960} \n", + "4 9920.0 2167.0 {'WALK': 658, 'TRANSIT': 1200} \n", + "... ... ... ... \n", + "2046 14702.0 2605.0 {'WALK': 1467, 'TRANSIT': 1132} \n", + "2047 14702.0 2605.0 {'WALK': 1467, 'TRANSIT': 1132} \n", + "2048 12104.0 2322.0 {'WALK': 991, 'TRANSIT': 1080} \n", + "2049 6893.0 2248.0 {'WALK': 1671, 'TRANSIT': 576} \n", + "2050 6490.0 2733.0 {'WALK': 451, 'TRANSIT': 2280} \n", + "\n", + " distance_per_transit number_of_transit_stops \n", + "0 {'WALK': 582, 'TRANSIT': 6228} 2.0 \n", + "1 {'WALK': 856, 'TRANSIT': 13214} 1.0 \n", + "2 {'WALK': 1035, 'TRANSIT': 8535} 1.0 \n", + "3 {'WALK': 710, 'TRANSIT': 7568} 2.0 \n", + "4 {'WALK': 720, 'TRANSIT': 9200} 2.0 \n", + "... ... ... \n", + "2046 {'WALK': 1698, 'TRANSIT': 13004} 1.0 \n", + "2047 {'WALK': 1698, 'TRANSIT': 13004} 1.0 \n", + "2048 {'WALK': 1089, 'TRANSIT': 11015} 2.0 \n", + "2049 {'WALK': 1945, 'TRANSIT': 4948} 1.0 \n", + "2050 {'WALK': 488, 'TRANSIT': 6002} 1.0 \n", + "\n", + "[2051 rows x 17 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.concat([df.drop(['travel_time_fastest'], axis=1), df['travel_time_fastest'].apply(pd.Series)], axis=1)\n", + "df = pd.concat([df.drop(['travel_time_second'], axis=1), df['travel_time_second'].apply(pd.Series)], axis=1)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abcbde40-7432-4449-957a-79ce2ca126eb", + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "99c5b304-3d13-466b-a9f5-83a5db6311b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqm_ocrpriceprice_per_sqmurlbedrooms
95374142.40550000.07.418157https://www.rightmove.co.uk/properties/1455465382
24921850.10725000.033.180626https://www.rightmove.co.uk/properties/1408217362
177137.50695000.05054.545455https://www.rightmove.co.uk/properties/1393542592
35285.50475000.05555.555556https://www.rightmove.co.uk/properties/1421423482
3682.90475000.05729.794934https://www.rightmove.co.uk/properties/1289259502
700101.00650000.06435.643564https://www.rightmove.co.uk/properties/1445915721
82380.40525000.06529.850746https://www.rightmove.co.uk/properties/1450517692
21491.51650000.07103.048847https://www.rightmove.co.uk/properties/1403263072
17191.10650000.07135.016465https://www.rightmove.co.uk/properties/1392454282
598109.40795000.07266.910420https://www.rightmove.co.uk/properties/1440346552
14693.78700000.07464.278098https://www.rightmove.co.uk/properties/1385103952
711100.31750000.07476.821852https://www.rightmove.co.uk/properties/1446203032
59286.49650000.07515.319690https://www.rightmove.co.uk/properties/1439876692
624101.35775000.07646.768624https://www.rightmove.co.uk/properties/1442179221
5597.60750000.07684.426230https://www.rightmove.co.uk/properties/1325647372
85181.01625000.07715.096902https://www.rightmove.co.uk/properties/1451725042
35689.00695000.07808.988764https://www.rightmove.co.uk/properties/1421856232
74885.90675000.07857.974389https://www.rightmove.co.uk/properties/1447712812
20482.40650000.07888.349515https://www.rightmove.co.uk/properties/1401733192
65483.33665000.07980.319213https://www.rightmove.co.uk/properties/1443611002
51487.50700000.08000.000000https://www.rightmove.co.uk/properties/1434603651
76292.90750000.08073.196986https://www.rightmove.co.uk/properties/1448467251
96392.90750000.08073.196986https://www.rightmove.co.uk/properties/1455652522
93886.10699000.08118.466899https://www.rightmove.co.uk/properties/1454911372
40390.60750000.08278.145695https://www.rightmove.co.uk/properties/1427044162
30293.30795000.08520.900322https://www.rightmove.co.uk/properties/1415954332
67193.27800000.08577.248847https://www.rightmove.co.uk/properties/1444291402
34980.57695000.08626.039469https://www.rightmove.co.uk/properties/1421159182
22490.10800000.08879.023307https://www.rightmove.co.uk/properties/1404644812
74081.70735000.08996.328029https://www.rightmove.co.uk/properties/1447224142
32687.30800000.09163.802978https://www.rightmove.co.uk/properties/1418460232
31981.57750000.09194.556822https://www.rightmove.co.uk/properties/1417973572
55886.77800000.09219.776420https://www.rightmove.co.uk/properties/1437587632
71286.30800000.09269.988413https://www.rightmove.co.uk/properties/1446221572
47386.00800000.09302.325581https://www.rightmove.co.uk/properties/1432101022
10580.40750000.09328.358209https://www.rightmove.co.uk/properties/1369887262
100380.27750000.09343.465803https://www.rightmove.co.uk/properties/867752912
23582.80775000.09359.903382https://www.rightmove.co.uk/properties/1406110552
6585.10800000.09400.705053https://www.rightmove.co.uk/properties/1341162322
3083.70795000.09498.207885https://www.rightmove.co.uk/properties/1277879601
102581.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262
8881.75800000.09785.932722https://www.rightmove.co.uk/properties/1360121931
45480.80800000.09900.990099https://www.rightmove.co.uk/properties/1431388672
34380.64800000.09920.634921https://www.rightmove.co.uk/properties/1420329352
\n", + "
" + ], + "text/plain": [ + " sqm_ocr price price_per_sqm \\\n", + "953 74142.40 550000.0 7.418157 \n", + "249 21850.10 725000.0 33.180626 \n", + "177 137.50 695000.0 5054.545455 \n", + "352 85.50 475000.0 5555.555556 \n", + "36 82.90 475000.0 5729.794934 \n", + "700 101.00 650000.0 6435.643564 \n", + "823 80.40 525000.0 6529.850746 \n", + "214 91.51 650000.0 7103.048847 \n", + "171 91.10 650000.0 7135.016465 \n", + "598 109.40 795000.0 7266.910420 \n", + "146 93.78 700000.0 7464.278098 \n", + "711 100.31 750000.0 7476.821852 \n", + "592 86.49 650000.0 7515.319690 \n", + "624 101.35 775000.0 7646.768624 \n", + "55 97.60 750000.0 7684.426230 \n", + "851 81.01 625000.0 7715.096902 \n", + "356 89.00 695000.0 7808.988764 \n", + "748 85.90 675000.0 7857.974389 \n", + "204 82.40 650000.0 7888.349515 \n", + "654 83.33 665000.0 7980.319213 \n", + "514 87.50 700000.0 8000.000000 \n", + "762 92.90 750000.0 8073.196986 \n", + "963 92.90 750000.0 8073.196986 \n", + "938 86.10 699000.0 8118.466899 \n", + "403 90.60 750000.0 8278.145695 \n", + "302 93.30 795000.0 8520.900322 \n", + "671 93.27 800000.0 8577.248847 \n", + "349 80.57 695000.0 8626.039469 \n", + "224 90.10 800000.0 8879.023307 \n", + "740 81.70 735000.0 8996.328029 \n", + "326 87.30 800000.0 9163.802978 \n", + "319 81.57 750000.0 9194.556822 \n", + "558 86.77 800000.0 9219.776420 \n", + "712 86.30 800000.0 9269.988413 \n", + "473 86.00 800000.0 9302.325581 \n", + "105 80.40 750000.0 9328.358209 \n", + "1003 80.27 750000.0 9343.465803 \n", + "235 82.80 775000.0 9359.903382 \n", + "65 85.10 800000.0 9400.705053 \n", + "30 83.70 795000.0 9498.207885 \n", + "1025 81.60 790000.0 9681.372549 \n", + "88 81.75 800000.0 9785.932722 \n", + "454 80.80 800000.0 9900.990099 \n", + "343 80.64 800000.0 9920.634921 \n", + "\n", + " url bedrooms \n", + "953 https://www.rightmove.co.uk/properties/145546538 2 \n", + "249 https://www.rightmove.co.uk/properties/140821736 2 \n", + "177 https://www.rightmove.co.uk/properties/139354259 2 \n", + "352 https://www.rightmove.co.uk/properties/142142348 2 \n", + "36 https://www.rightmove.co.uk/properties/128925950 2 \n", + "700 https://www.rightmove.co.uk/properties/144591572 1 \n", + "823 https://www.rightmove.co.uk/properties/145051769 2 \n", + "214 https://www.rightmove.co.uk/properties/140326307 2 \n", + "171 https://www.rightmove.co.uk/properties/139245428 2 \n", + "598 https://www.rightmove.co.uk/properties/144034655 2 \n", + "146 https://www.rightmove.co.uk/properties/138510395 2 \n", + "711 https://www.rightmove.co.uk/properties/144620303 2 \n", + "592 https://www.rightmove.co.uk/properties/143987669 2 \n", + "624 https://www.rightmove.co.uk/properties/144217922 1 \n", + "55 https://www.rightmove.co.uk/properties/132564737 2 \n", + "851 https://www.rightmove.co.uk/properties/145172504 2 \n", + "356 https://www.rightmove.co.uk/properties/142185623 2 \n", + "748 https://www.rightmove.co.uk/properties/144771281 2 \n", + "204 https://www.rightmove.co.uk/properties/140173319 2 \n", + "654 https://www.rightmove.co.uk/properties/144361100 2 \n", + "514 https://www.rightmove.co.uk/properties/143460365 1 \n", + "762 https://www.rightmove.co.uk/properties/144846725 1 \n", + "963 https://www.rightmove.co.uk/properties/145565252 2 \n", + "938 https://www.rightmove.co.uk/properties/145491137 2 \n", + "403 https://www.rightmove.co.uk/properties/142704416 2 \n", + "302 https://www.rightmove.co.uk/properties/141595433 2 \n", + "671 https://www.rightmove.co.uk/properties/144429140 2 \n", + "349 https://www.rightmove.co.uk/properties/142115918 2 \n", + "224 https://www.rightmove.co.uk/properties/140464481 2 \n", + "740 https://www.rightmove.co.uk/properties/144722414 2 \n", + "326 https://www.rightmove.co.uk/properties/141846023 2 \n", + "319 https://www.rightmove.co.uk/properties/141797357 2 \n", + "558 https://www.rightmove.co.uk/properties/143758763 2 \n", + "712 https://www.rightmove.co.uk/properties/144622157 2 \n", + "473 https://www.rightmove.co.uk/properties/143210102 2 \n", + "105 https://www.rightmove.co.uk/properties/136988726 2 \n", + "1003 https://www.rightmove.co.uk/properties/86775291 2 \n", + "235 https://www.rightmove.co.uk/properties/140611055 2 \n", + "65 https://www.rightmove.co.uk/properties/134116232 2 \n", + "30 https://www.rightmove.co.uk/properties/127787960 1 \n", + "1025 https://www.rightmove.co.uk/properties/86972726 2 \n", + "88 https://www.rightmove.co.uk/properties/136012193 1 \n", + "454 https://www.rightmove.co.uk/properties/143138867 2 \n", + "343 https://www.rightmove.co.uk/properties/142032935 2 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = df[df.sqm_ocr > 80]\n", + "df2.sort_values('price_per_sqm')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "52545cfa-0932-46fe-ba7e-961fd43f2786", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqm_ocrpriceprice_per_sqmurlbedrooms
35973.40400000.05449.591281https://www.rightmove.co.uk/properties/1421869912
29377.00425000.05519.480519https://www.rightmove.co.uk/properties/1414377832
35285.50475000.05555.555556https://www.rightmove.co.uk/properties/1421423482
68576.03425000.05589.898724https://www.rightmove.co.uk/properties/1444940122
3682.90475000.05729.794934https://www.rightmove.co.uk/properties/1289259502
49171.68450000.06277.901786https://www.rightmove.co.uk/properties/1433158402
102073.67495000.06719.152980https://www.rightmove.co.uk/properties/868079162
\n", + "
" + ], + "text/plain": [ + " sqm_ocr price price_per_sqm \\\n", + "359 73.40 400000.0 5449.591281 \n", + "293 77.00 425000.0 5519.480519 \n", + "352 85.50 475000.0 5555.555556 \n", + "685 76.03 425000.0 5589.898724 \n", + "36 82.90 475000.0 5729.794934 \n", + "491 71.68 450000.0 6277.901786 \n", + "1020 73.67 495000.0 6719.152980 \n", + "\n", + " url bedrooms \n", + "359 https://www.rightmove.co.uk/properties/142186991 2 \n", + "293 https://www.rightmove.co.uk/properties/141437783 2 \n", + "352 https://www.rightmove.co.uk/properties/142142348 2 \n", + "685 https://www.rightmove.co.uk/properties/144494012 2 \n", + "36 https://www.rightmove.co.uk/properties/128925950 2 \n", + "491 https://www.rightmove.co.uk/properties/143315840 2 \n", + "1020 https://www.rightmove.co.uk/properties/86807916 2 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = df[(df.sqm_ocr > 70) & (df.price <500000)]\n", + "df2.sort_values('price_per_sqm')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d0246926-13ef-4110-8e3a-fb676a55c2a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqm_ocrpriceprice_per_sqmurlbedrooms
95374142.40550000.07.418157https://www.rightmove.co.uk/properties/1455465382
82380.40525000.06529.850746https://www.rightmove.co.uk/properties/1450517692
49273.90525000.07104.194858https://www.rightmove.co.uk/properties/1433173612
56176.70550000.07170.795306https://www.rightmove.co.uk/properties/1437807892
6973.10525000.07181.942544https://www.rightmove.co.uk/properties/1345745632
98872.40525000.07251.381215https://www.rightmove.co.uk/properties/866489252
27277.89575000.07382.205675https://www.rightmove.co.uk/properties/1411312972
52170.88525000.07406.884876https://www.rightmove.co.uk/properties/1435141492
32476.10575000.07555.847569https://www.rightmove.co.uk/properties/1418313532
101871.80550000.07660.167131https://www.rightmove.co.uk/properties/868048322
52678.00600000.07692.307692https://www.rightmove.co.uk/properties/1435521561
81771.17550000.07727.975270https://www.rightmove.co.uk/properties/1450359292
84377.50600000.07741.935484https://www.rightmove.co.uk/properties/1451449882
33770.60550000.07790.368272https://www.rightmove.co.uk/properties/1419042862
23375.70600000.07926.023778https://www.rightmove.co.uk/properties/1405822132
76375.00600000.08000.000000https://www.rightmove.co.uk/properties/1448620702
31571.30590000.08274.894811https://www.rightmove.co.uk/properties/1416696862
89971.47595000.08325.171401https://www.rightmove.co.uk/properties/1453629112
14771.50600000.08391.608392https://www.rightmove.co.uk/properties/1385375271
97370.89595000.08393.285372https://www.rightmove.co.uk/properties/862964912
\n", + "
" + ], + "text/plain": [ + " sqm_ocr price price_per_sqm \\\n", + "953 74142.40 550000.0 7.418157 \n", + "823 80.40 525000.0 6529.850746 \n", + "492 73.90 525000.0 7104.194858 \n", + "561 76.70 550000.0 7170.795306 \n", + "69 73.10 525000.0 7181.942544 \n", + "988 72.40 525000.0 7251.381215 \n", + "272 77.89 575000.0 7382.205675 \n", + "521 70.88 525000.0 7406.884876 \n", + "324 76.10 575000.0 7555.847569 \n", + "1018 71.80 550000.0 7660.167131 \n", + "526 78.00 600000.0 7692.307692 \n", + "817 71.17 550000.0 7727.975270 \n", + "843 77.50 600000.0 7741.935484 \n", + "337 70.60 550000.0 7790.368272 \n", + "233 75.70 600000.0 7926.023778 \n", + "763 75.00 600000.0 8000.000000 \n", + "315 71.30 590000.0 8274.894811 \n", + "899 71.47 595000.0 8325.171401 \n", + "147 71.50 600000.0 8391.608392 \n", + "973 70.89 595000.0 8393.285372 \n", + "\n", + " url bedrooms \n", + "953 https://www.rightmove.co.uk/properties/145546538 2 \n", + "823 https://www.rightmove.co.uk/properties/145051769 2 \n", + "492 https://www.rightmove.co.uk/properties/143317361 2 \n", + "561 https://www.rightmove.co.uk/properties/143780789 2 \n", + "69 https://www.rightmove.co.uk/properties/134574563 2 \n", + "988 https://www.rightmove.co.uk/properties/86648925 2 \n", + "272 https://www.rightmove.co.uk/properties/141131297 2 \n", + "521 https://www.rightmove.co.uk/properties/143514149 2 \n", + "324 https://www.rightmove.co.uk/properties/141831353 2 \n", + "1018 https://www.rightmove.co.uk/properties/86804832 2 \n", + "526 https://www.rightmove.co.uk/properties/143552156 1 \n", + "817 https://www.rightmove.co.uk/properties/145035929 2 \n", + "843 https://www.rightmove.co.uk/properties/145144988 2 \n", + "337 https://www.rightmove.co.uk/properties/141904286 2 \n", + "233 https://www.rightmove.co.uk/properties/140582213 2 \n", + "763 https://www.rightmove.co.uk/properties/144862070 2 \n", + "315 https://www.rightmove.co.uk/properties/141669686 2 \n", + "899 https://www.rightmove.co.uk/properties/145362911 2 \n", + "147 https://www.rightmove.co.uk/properties/138537527 1 \n", + "973 https://www.rightmove.co.uk/properties/86296491 2 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = df[(df.sqm_ocr > 70) & (df.price <=600000)& (df.price >500000)]\n", + "df2.sort_values('price_per_sqm')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9f2ca7b-8476-47f1-a34f-7acdfbe84bd1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "edd9fa24-cad2-4448-9b17-c6d514564f41", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqm_ocrpriceprice_per_sqmurlbedrooms
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [sqm_ocr, price, price_per_sqm, url, bedrooms]\n", + "Index: []" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.bedrooms > 2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "002b2a3a-3ecc-45c1-8c2f-c143380ee0d5", + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, "nbformat": 4, "nbformat_minor": 5 } diff --git a/crawler/1_dump_listings.py b/crawler/1_dump_listings.py index 24f5cb4..cc2aa53 100644 --- a/crawler/1_dump_listings.py +++ b/crawler/1_dump_listings.py @@ -9,7 +9,7 @@ folder = pathlib.Path("data/rs/") for i in range(1, 10000): try: print(f"page {i}") - d = listing_query(i, 3, 3, 15, 0, 800000) + d = listing_query(i, 3, 3, 15, 0, 800000, max_days_since_added=3) except: break diff --git a/crawler/4_detect_floorplan.py b/crawler/4_detect_floorplan.py index fbbc818..388f2d4 100644 --- a/crawler/4_detect_floorplan.py +++ b/crawler/4_detect_floorplan.py @@ -6,4 +6,4 @@ listings = Listing.get_all_listings() for listing in tqdm(listings): tqdm.write(str(listing.identifier)) # listing.calculate_sqm_model() # using google/deplot model. Too slow, rather use tesseract - listing.calculate_sqm_ocr() + listing.calculate_sqm_ocr(recalculate=False) diff --git a/crawler/data_access.py b/crawler/data_access.py index d3faaaa..5d809e5 100644 --- a/crawler/data_access.py +++ b/crawler/data_access.py @@ -2,7 +2,8 @@ from dataclasses import dataclass import json import pathlib from typing import List, Dict -from rec import floorplan +from rec import floorplan, routing +import re _DATA_DIR = pathlib.Path('data/rs/') @@ -34,6 +35,9 @@ class Listing(): def path_detail_json(self) -> pathlib.Path: return self.path_listing() / 'detail.json' + def path_routing_json(self) -> pathlib.Path: + return self.path_listing() / 'routing.json' + def path_floorplan_model_json(self) -> pathlib.Path: return self.path_listing() / 'floorplan_model.json' @@ -115,6 +119,24 @@ class Listing(): max_sqm = max(sqms) return max_sqm + def calculate_route(self, dest_lat: float, dest_lon: float, recalculate=False): + if self.path_routing_json().exists() and not recalculate: + return + + result = routing.transit_route(self.latitude, self.longitude, dest_lat, dest_lon) + with open(self.path_routing_json(), 'w') as f: + json.dump(result, f) + + @property + def travel_time(self) -> List: + if not self.path_routing_json().exists(): + return [] + with open(self.path_routing_json()) as f: + d = json.load(f) + + return routing.extract_time(d) + + @property def url(self): return f'https://www.rightmove.co.uk/properties/{self.identifier}' @@ -140,6 +162,29 @@ class Listing(): def bedrooms(self) -> int: return self.detailobject['property']['bedrooms'] + @property + def latitude(self) -> float: + return self.detailobject['property']['latitude'] + + @property + def longitude(self) -> float: + return self.detailobject['property']['longitude'] + + @property + def leaseLeft(self) -> int: + ds = self.detailobject['property'].get('tenureInfo', {}).get('content', []) + for d in ds: + if d['type'] == 'lengthOfLease': + matches = re.findall(r'(\d+\.?\d*)', d['value']) + if len(matches): + return float(matches[0]) + return None + + @property + def development(self) -> bool: + # aka new home + return self.detailobject['property']['development'] + def dict_nicely(self): return { 'sqm_ocr': self.sqm_ocr, @@ -147,6 +192,10 @@ class Listing(): 'price_per_sqm': self.price_per_sqm, 'url': self.url, 'bedrooms': self.bedrooms, + 'travel_time_fastest': self.travel_time[0], + 'travel_time_second': None if len(self.travel_time) < 2 else self.travel_time[1], + 'lease_left': self.leaseLeft, + 'development': self.development, } diff --git a/crawler/exploration.ipynb b/crawler/exploration.ipynb index 65bd0bb..5912de3 100644 --- a/crawler/exploration.ipynb +++ b/crawler/exploration.ipynb @@ -72,6 +72,10 @@ " price_per_sqm\n", " url\n", " bedrooms\n", + " travel_time_fastest\n", + " travel_time_second\n", + " lease_left\n", + " development\n", " \n", " \n", " \n", @@ -82,38 +86,58 @@ " 44814.340589\n", " https://www.rightmove.co.uk/properties/102360773\n", " 1\n", + " {'duration': 2695, 'distance': 6467, 'duration...\n", + " {'duration': 1682, 'distance': 6810, 'duration...\n", + " 119.0\n", + " False\n", " \n", " \n", " 1\n", - " 5.88\n", - " 695000.0\n", - " 118197.278912\n", - " https://www.rightmove.co.uk/properties/115530848\n", - " 1\n", + " NaN\n", + " 400000.0\n", + " NaN\n", + " https://www.rightmove.co.uk/properties/105836849\n", + " 3\n", + " {'duration': 2565, 'distance': 14070, 'duratio...\n", + " {'duration': 2565, 'distance': 14070, 'duratio...\n", + " NaN\n", + " False\n", " \n", " \n", " 2\n", - " 64.74\n", - " 575000.0\n", - " 8881.680568\n", - " https://www.rightmove.co.uk/properties/117095606\n", - " 2\n", + " 76.91\n", + " 400000.0\n", + " 5200.884150\n", + " https://www.rightmove.co.uk/properties/107233214\n", + " 3\n", + " {'duration': 1714, 'distance': 9570, 'duration...\n", + " {'duration': 1774, 'distance': 9570, 'duration...\n", + " 91.0\n", + " False\n", " \n", " \n", " 3\n", - " 52.80\n", - " 790000.0\n", - " 14962.121212\n", - " https://www.rightmove.co.uk/properties/118752221\n", - " 1\n", + " 112.40\n", + " 800000.0\n", + " 7117.437722\n", + " https://www.rightmove.co.uk/properties/107976896\n", + " 3\n", + " {'duration': 1862, 'distance': 8278, 'duration...\n", + " {'duration': 1862, 'distance': 8278, 'duration...\n", + " NaN\n", + " False\n", " \n", " \n", " 4\n", - " 4.56\n", - " 425000.0\n", - " 93201.754386\n", - " https://www.rightmove.co.uk/properties/119578451\n", - " 2\n", + " 115.60\n", + " 775000.0\n", + " 6704.152249\n", + " https://www.rightmove.co.uk/properties/115499441\n", + " 3\n", + " {'duration': 2943, 'distance': 7437, 'duration...\n", + " {'duration': 2167, 'distance': 9920, 'duration...\n", + " NaN\n", + " False\n", " \n", " \n", " ...\n", @@ -122,80 +146,143 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 1021\n", + " 2129\n", " NaN\n", - " 220000.0\n", + " 750000.0\n", " NaN\n", - " https://www.rightmove.co.uk/properties/86809926\n", - " 1\n", + " https://www.rightmove.co.uk/properties/86813508\n", + " 3\n", + " {'duration': 2400, 'distance': 13983, 'duratio...\n", + " {'duration': 2605, 'distance': 14702, 'duratio...\n", + " NaN\n", + " True\n", " \n", " \n", - " 1022\n", - " 49.00\n", - " 450000.0\n", - " 9183.673469\n", - " https://www.rightmove.co.uk/properties/86811141\n", - " 1\n", + " 2130\n", + " NaN\n", + " 655000.0\n", + " NaN\n", + " https://www.rightmove.co.uk/properties/86813523\n", + " 3\n", + " {'duration': 2400, 'distance': 13983, 'duratio...\n", + " {'duration': 2605, 'distance': 14702, 'duratio...\n", + " NaN\n", + " True\n", " \n", " \n", - " 1023\n", - " 58.20\n", + " 2131\n", + " 82.80\n", " 550000.0\n", - " 9450.171821\n", - " https://www.rightmove.co.uk/properties/86811177\n", - " 2\n", + " 6642.512077\n", + " https://www.rightmove.co.uk/properties/86814669\n", + " 3\n", + " {'duration': 2310, 'distance': 12972, 'duratio...\n", + " {'duration': 2322, 'distance': 12104, 'duratio...\n", + " 0.0\n", + " False\n", " \n", " \n", - " 1024\n", - " 3.00\n", - " 475000.0\n", - " 158333.333333\n", - " https://www.rightmove.co.uk/properties/86812494\n", - " 1\n", + " 2132\n", + " 5.52\n", + " 300000.0\n", + " 54347.826087\n", + " https://www.rightmove.co.uk/properties/86955958\n", + " 3\n", + " {'duration': 2332, 'distance': 6898, 'duration...\n", + " {'duration': 2248, 'distance': 6893, 'duration...\n", + " 105.0\n", + " False\n", " \n", " \n", - " 1025\n", + " 2133\n", " 81.60\n", " 790000.0\n", " 9681.372549\n", " https://www.rightmove.co.uk/properties/86972726\n", " 2\n", + " {'duration': 1393, 'distance': 6390, 'duration...\n", + " {'duration': 2733, 'distance': 6490, 'duration...\n", + " 993.0\n", + " False\n", " \n", " \n", "\n", - "

1026 rows × 5 columns

\n", + "

2134 rows × 9 columns

\n", "" ], "text/plain": [ " sqm_ocr price price_per_sqm \\\n", "0 7.81 350000.0 44814.340589 \n", - "1 5.88 695000.0 118197.278912 \n", - "2 64.74 575000.0 8881.680568 \n", - "3 52.80 790000.0 14962.121212 \n", - "4 4.56 425000.0 93201.754386 \n", + "1 NaN 400000.0 NaN \n", + "2 76.91 400000.0 5200.884150 \n", + "3 112.40 800000.0 7117.437722 \n", + "4 115.60 775000.0 6704.152249 \n", "... ... ... ... \n", - "1021 NaN 220000.0 NaN \n", - "1022 49.00 450000.0 9183.673469 \n", - "1023 58.20 550000.0 9450.171821 \n", - "1024 3.00 475000.0 158333.333333 \n", - "1025 81.60 790000.0 9681.372549 \n", + "2129 NaN 750000.0 NaN \n", + "2130 NaN 655000.0 NaN \n", + "2131 82.80 550000.0 6642.512077 \n", + "2132 5.52 300000.0 54347.826087 \n", + "2133 81.60 790000.0 9681.372549 \n", "\n", - " url bedrooms \n", - "0 https://www.rightmove.co.uk/properties/102360773 1 \n", - "1 https://www.rightmove.co.uk/properties/115530848 1 \n", - "2 https://www.rightmove.co.uk/properties/117095606 2 \n", - "3 https://www.rightmove.co.uk/properties/118752221 1 \n", - "4 https://www.rightmove.co.uk/properties/119578451 2 \n", - "... ... ... \n", - "1021 https://www.rightmove.co.uk/properties/86809926 1 \n", - "1022 https://www.rightmove.co.uk/properties/86811141 1 \n", - "1023 https://www.rightmove.co.uk/properties/86811177 2 \n", - "1024 https://www.rightmove.co.uk/properties/86812494 1 \n", - "1025 https://www.rightmove.co.uk/properties/86972726 2 \n", + " url bedrooms \\\n", + "0 https://www.rightmove.co.uk/properties/102360773 1 \n", + "1 https://www.rightmove.co.uk/properties/105836849 3 \n", + "2 https://www.rightmove.co.uk/properties/107233214 3 \n", + "3 https://www.rightmove.co.uk/properties/107976896 3 \n", + "4 https://www.rightmove.co.uk/properties/115499441 3 \n", + "... ... ... \n", + "2129 https://www.rightmove.co.uk/properties/86813508 3 \n", + "2130 https://www.rightmove.co.uk/properties/86813523 3 \n", + "2131 https://www.rightmove.co.uk/properties/86814669 3 \n", + "2132 https://www.rightmove.co.uk/properties/86955958 3 \n", + "2133 https://www.rightmove.co.uk/properties/86972726 2 \n", "\n", - "[1026 rows x 5 columns]" + " travel_time_fastest \\\n", + "0 {'duration': 2695, 'distance': 6467, 'duration... \n", + "1 {'duration': 2565, 'distance': 14070, 'duratio... \n", + "2 {'duration': 1714, 'distance': 9570, 'duration... \n", + "3 {'duration': 1862, 'distance': 8278, 'duration... \n", + "4 {'duration': 2943, 'distance': 7437, 'duration... \n", + "... ... \n", + "2129 {'duration': 2400, 'distance': 13983, 'duratio... \n", + "2130 {'duration': 2400, 'distance': 13983, 'duratio... \n", + "2131 {'duration': 2310, 'distance': 12972, 'duratio... \n", + "2132 {'duration': 2332, 'distance': 6898, 'duration... \n", + "2133 {'duration': 1393, 'distance': 6390, 'duration... \n", + "\n", + " travel_time_second lease_left \\\n", + "0 {'duration': 1682, 'distance': 6810, 'duration... 119.0 \n", + "1 {'duration': 2565, 'distance': 14070, 'duratio... NaN \n", + "2 {'duration': 1774, 'distance': 9570, 'duration... 91.0 \n", + "3 {'duration': 1862, 'distance': 8278, 'duration... NaN \n", + "4 {'duration': 2167, 'distance': 9920, 'duration... NaN \n", + "... ... ... \n", + "2129 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", + "2130 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", + "2131 {'duration': 2322, 'distance': 12104, 'duratio... 0.0 \n", + "2132 {'duration': 2248, 'distance': 6893, 'duration... 105.0 \n", + "2133 {'duration': 2733, 'distance': 6490, 'duration... 993.0 \n", + "\n", + " development \n", + "0 False \n", + "1 False \n", + "2 False \n", + "3 False \n", + "4 False \n", + "... ... \n", + "2129 True \n", + "2130 True \n", + "2131 False \n", + "2132 False \n", + "2133 False \n", + "\n", + "[2134 rows x 9 columns]" ] }, "execution_count": 4, @@ -208,6 +295,744 @@ "df" ] }, + { + "cell_type": "code", + "execution_count": 14, + "id": "387c48d9-26c6-4bed-8201-352735c06acb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1682.0\n", + "1 2565.0\n", + "2 1714.0\n", + "3 1862.0\n", + "4 2167.0\n", + " ... \n", + "2129 2400.0\n", + "2130 2400.0\n", + "2131 2310.0\n", + "2132 2248.0\n", + "2133 1393.0\n", + "Length: 2134, dtype: float64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "7b37ad6b-9b0a-444e-b8c3-6fe4e43e42cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqm_ocrpriceprice_per_sqmurlbedroomslease_leftdevelopmenta_durationa_duration_per_transita_number_of_transit_stopsb_durationb_duration_per_transitb_number_of_transit_stopsmin_duration
07.81350000.044814.340589https://www.rightmove.co.uk/properties/1023607731119.0False2695{'WALK': 414, 'TRANSIT': 2280}11682.0{'WALK': 608, 'TRANSIT': 804}2.028.0
1NaN400000.0NaNhttps://www.rightmove.co.uk/properties/1058368493NaNFalse2565{'WALK': 750, 'TRANSIT': 1800}12565.0{'WALK': 750, 'TRANSIT': 1800}1.043.0
276.91400000.05200.884150https://www.rightmove.co.uk/properties/107233214391.0False1714{'WALK': 903, 'TRANSIT': 780}11774.0{'WALK': 903, 'TRANSIT': 840}1.029.0
3112.40800000.07117.437722https://www.rightmove.co.uk/properties/1079768963NaNFalse1862{'WALK': 635, 'TRANSIT': 900}21862.0{'WALK': 635, 'TRANSIT': 960}2.031.0
4115.60775000.06704.152249https://www.rightmove.co.uk/properties/1154994413NaNFalse2943{'WALK': 242, 'TRANSIT': 2700}12167.0{'WALK': 658, 'TRANSIT': 1200}2.036.0
.............................................
2129NaN750000.0NaNhttps://www.rightmove.co.uk/properties/868135083NaNTrue2400{'WALK': 603, 'TRANSIT': 1524}22605.0{'WALK': 1467, 'TRANSIT': 1132}1.040.0
2130NaN655000.0NaNhttps://www.rightmove.co.uk/properties/868135233NaNTrue2400{'WALK': 603, 'TRANSIT': 1524}22605.0{'WALK': 1467, 'TRANSIT': 1132}1.040.0
213182.80550000.06642.512077https://www.rightmove.co.uk/properties/8681466930.0False2310{'WALK': 786, 'TRANSIT': 1277}22322.0{'WALK': 991, 'TRANSIT': 1080}2.038.0
21325.52300000.054347.826087https://www.rightmove.co.uk/properties/869559583105.0False2332{'WALK': 1671, 'TRANSIT': 660}12248.0{'WALK': 1671, 'TRANSIT': 576}1.037.0
213381.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262993.0False1393{'WALK': 518, 'TRANSIT': 564}22733.0{'WALK': 451, 'TRANSIT': 2280}1.023.0
\n", + "

2134 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " sqm_ocr price price_per_sqm \\\n", + "0 7.81 350000.0 44814.340589 \n", + "1 NaN 400000.0 NaN \n", + "2 76.91 400000.0 5200.884150 \n", + "3 112.40 800000.0 7117.437722 \n", + "4 115.60 775000.0 6704.152249 \n", + "... ... ... ... \n", + "2129 NaN 750000.0 NaN \n", + "2130 NaN 655000.0 NaN \n", + "2131 82.80 550000.0 6642.512077 \n", + "2132 5.52 300000.0 54347.826087 \n", + "2133 81.60 790000.0 9681.372549 \n", + "\n", + " url bedrooms lease_left \\\n", + "0 https://www.rightmove.co.uk/properties/102360773 1 119.0 \n", + "1 https://www.rightmove.co.uk/properties/105836849 3 NaN \n", + "2 https://www.rightmove.co.uk/properties/107233214 3 91.0 \n", + "3 https://www.rightmove.co.uk/properties/107976896 3 NaN \n", + "4 https://www.rightmove.co.uk/properties/115499441 3 NaN \n", + "... ... ... ... \n", + "2129 https://www.rightmove.co.uk/properties/86813508 3 NaN \n", + "2130 https://www.rightmove.co.uk/properties/86813523 3 NaN \n", + "2131 https://www.rightmove.co.uk/properties/86814669 3 0.0 \n", + "2132 https://www.rightmove.co.uk/properties/86955958 3 105.0 \n", + "2133 https://www.rightmove.co.uk/properties/86972726 2 993.0 \n", + "\n", + " development a_duration a_duration_per_transit \\\n", + "0 False 2695 {'WALK': 414, 'TRANSIT': 2280} \n", + "1 False 2565 {'WALK': 750, 'TRANSIT': 1800} \n", + "2 False 1714 {'WALK': 903, 'TRANSIT': 780} \n", + "3 False 1862 {'WALK': 635, 'TRANSIT': 900} \n", + "4 False 2943 {'WALK': 242, 'TRANSIT': 2700} \n", + "... ... ... ... \n", + "2129 True 2400 {'WALK': 603, 'TRANSIT': 1524} \n", + "2130 True 2400 {'WALK': 603, 'TRANSIT': 1524} \n", + "2131 False 2310 {'WALK': 786, 'TRANSIT': 1277} \n", + "2132 False 2332 {'WALK': 1671, 'TRANSIT': 660} \n", + "2133 False 1393 {'WALK': 518, 'TRANSIT': 564} \n", + "\n", + " a_number_of_transit_stops b_duration b_duration_per_transit \\\n", + "0 1 1682.0 {'WALK': 608, 'TRANSIT': 804} \n", + "1 1 2565.0 {'WALK': 750, 'TRANSIT': 1800} \n", + "2 1 1774.0 {'WALK': 903, 'TRANSIT': 840} \n", + "3 2 1862.0 {'WALK': 635, 'TRANSIT': 960} \n", + "4 1 2167.0 {'WALK': 658, 'TRANSIT': 1200} \n", + "... ... ... ... \n", + "2129 2 2605.0 {'WALK': 1467, 'TRANSIT': 1132} \n", + "2130 2 2605.0 {'WALK': 1467, 'TRANSIT': 1132} \n", + "2131 2 2322.0 {'WALK': 991, 'TRANSIT': 1080} \n", + "2132 1 2248.0 {'WALK': 1671, 'TRANSIT': 576} \n", + "2133 2 2733.0 {'WALK': 451, 'TRANSIT': 2280} \n", + "\n", + " b_number_of_transit_stops min_duration \n", + "0 2.0 28.0 \n", + "1 1.0 43.0 \n", + "2 1.0 29.0 \n", + "3 2.0 31.0 \n", + "4 2.0 36.0 \n", + "... ... ... \n", + "2129 1.0 40.0 \n", + "2130 1.0 40.0 \n", + "2131 2.0 38.0 \n", + "2132 1.0 37.0 \n", + "2133 1.0 23.0 \n", + "\n", + "[2134 rows x 14 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dropcolumns = ['distance_per_transit', 'duration_static', 'distance']\n", + "s1 = df['travel_time_fastest'].apply(pd.Series).drop(dropcolumns, axis=1)\n", + "s1.columns = ['a_' + c for c in s1.columns]\n", + "\n", + "s2 = df['travel_time_second'].apply(pd.Series).drop(dropcolumns, axis=1)\n", + "s2.columns = ['b_' + c for c in s2.columns]\n", + "\n", + "df2 = pd.concat([df.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1, s2], axis=1)\n", + "df2.loc[:, 'min_duration'] = (df2.loc[:, ['a_duration', 'b_duration']].min(axis=1) / 60).round()\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "8c75aaa6-6113-482f-809b-11e405510184", + "metadata": {}, + "outputs": [], + "source": [ + "df2.to_clipboard()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "79f99692-91e8-4915-9b57-7b3a1efd7d3a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqm_ocrpriceprice_per_sqmurlbedroomsdurationdistanceduration_staticduration_per_transitdistance_per_transitnumber_of_transit_stopsdurationdistanceduration_staticduration_per_transitdistance_per_transitnumber_of_transit_stops
07.81350000.044814.340589https://www.rightmove.co.uk/properties/1023607731269564672695{'WALK': 414, 'TRANSIT': 2280}{'WALK': 465, 'TRANSIT': 6002}11682.06810.01682.0{'WALK': 608, 'TRANSIT': 804}{'WALK': 582, 'TRANSIT': 6228}2.0
1NaN400000.0NaNhttps://www.rightmove.co.uk/properties/10583684932565140702565{'WALK': 750, 'TRANSIT': 1800}{'WALK': 856, 'TRANSIT': 13214}12565.014070.02565.0{'WALK': 750, 'TRANSIT': 1800}{'WALK': 856, 'TRANSIT': 13214}1.0
276.91400000.05200.884150https://www.rightmove.co.uk/properties/1072332143171495701714{'WALK': 903, 'TRANSIT': 780}{'WALK': 1035, 'TRANSIT': 8535}11774.09570.01774.0{'WALK': 903, 'TRANSIT': 840}{'WALK': 1035, 'TRANSIT': 8535}1.0
3112.40800000.07117.437722https://www.rightmove.co.uk/properties/1079768963186282781862{'WALK': 635, 'TRANSIT': 900}{'WALK': 710, 'TRANSIT': 7568}21862.08278.01862.0{'WALK': 635, 'TRANSIT': 960}{'WALK': 710, 'TRANSIT': 7568}2.0
4115.60775000.06704.152249https://www.rightmove.co.uk/properties/1154994413294374372943{'WALK': 242, 'TRANSIT': 2700}{'WALK': 276, 'TRANSIT': 7161}12167.09920.02167.0{'WALK': 658, 'TRANSIT': 1200}{'WALK': 720, 'TRANSIT': 9200}2.0
......................................................
2046NaN750000.0NaNhttps://www.rightmove.co.uk/properties/8681350832400139832400{'WALK': 603, 'TRANSIT': 1524}{'WALK': 671, 'TRANSIT': 13312}22605.014702.02605.0{'WALK': 1467, 'TRANSIT': 1132}{'WALK': 1698, 'TRANSIT': 13004}1.0
2047NaN655000.0NaNhttps://www.rightmove.co.uk/properties/8681352332400139832400{'WALK': 603, 'TRANSIT': 1524}{'WALK': 671, 'TRANSIT': 13312}22605.014702.02605.0{'WALK': 1467, 'TRANSIT': 1132}{'WALK': 1698, 'TRANSIT': 13004}1.0
204882.80550000.06642.512077https://www.rightmove.co.uk/properties/8681466932310129722310{'WALK': 786, 'TRANSIT': 1277}{'WALK': 885, 'TRANSIT': 12087}22322.012104.02322.0{'WALK': 991, 'TRANSIT': 1080}{'WALK': 1089, 'TRANSIT': 11015}2.0
20495.52300000.054347.826087https://www.rightmove.co.uk/properties/869559583233268982332{'WALK': 1671, 'TRANSIT': 660}{'WALK': 1945, 'TRANSIT': 4953}12248.06893.02248.0{'WALK': 1671, 'TRANSIT': 576}{'WALK': 1945, 'TRANSIT': 4948}1.0
205081.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262139363901393{'WALK': 518, 'TRANSIT': 564}{'WALK': 441, 'TRANSIT': 5949}22733.06490.02733.0{'WALK': 451, 'TRANSIT': 2280}{'WALK': 488, 'TRANSIT': 6002}1.0
\n", + "

2051 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " sqm_ocr price price_per_sqm \\\n", + "0 7.81 350000.0 44814.340589 \n", + "1 NaN 400000.0 NaN \n", + "2 76.91 400000.0 5200.884150 \n", + "3 112.40 800000.0 7117.437722 \n", + "4 115.60 775000.0 6704.152249 \n", + "... ... ... ... \n", + "2046 NaN 750000.0 NaN \n", + "2047 NaN 655000.0 NaN \n", + "2048 82.80 550000.0 6642.512077 \n", + "2049 5.52 300000.0 54347.826087 \n", + "2050 81.60 790000.0 9681.372549 \n", + "\n", + " url bedrooms duration \\\n", + "0 https://www.rightmove.co.uk/properties/102360773 1 2695 \n", + "1 https://www.rightmove.co.uk/properties/105836849 3 2565 \n", + "2 https://www.rightmove.co.uk/properties/107233214 3 1714 \n", + "3 https://www.rightmove.co.uk/properties/107976896 3 1862 \n", + "4 https://www.rightmove.co.uk/properties/115499441 3 2943 \n", + "... ... ... ... \n", + "2046 https://www.rightmove.co.uk/properties/86813508 3 2400 \n", + "2047 https://www.rightmove.co.uk/properties/86813523 3 2400 \n", + "2048 https://www.rightmove.co.uk/properties/86814669 3 2310 \n", + "2049 https://www.rightmove.co.uk/properties/86955958 3 2332 \n", + "2050 https://www.rightmove.co.uk/properties/86972726 2 1393 \n", + "\n", + " distance duration_static duration_per_transit \\\n", + "0 6467 2695 {'WALK': 414, 'TRANSIT': 2280} \n", + "1 14070 2565 {'WALK': 750, 'TRANSIT': 1800} \n", + "2 9570 1714 {'WALK': 903, 'TRANSIT': 780} \n", + "3 8278 1862 {'WALK': 635, 'TRANSIT': 900} \n", + "4 7437 2943 {'WALK': 242, 'TRANSIT': 2700} \n", + "... ... ... ... \n", + "2046 13983 2400 {'WALK': 603, 'TRANSIT': 1524} \n", + "2047 13983 2400 {'WALK': 603, 'TRANSIT': 1524} \n", + "2048 12972 2310 {'WALK': 786, 'TRANSIT': 1277} \n", + "2049 6898 2332 {'WALK': 1671, 'TRANSIT': 660} \n", + "2050 6390 1393 {'WALK': 518, 'TRANSIT': 564} \n", + "\n", + " distance_per_transit number_of_transit_stops duration \\\n", + "0 {'WALK': 465, 'TRANSIT': 6002} 1 1682.0 \n", + "1 {'WALK': 856, 'TRANSIT': 13214} 1 2565.0 \n", + "2 {'WALK': 1035, 'TRANSIT': 8535} 1 1774.0 \n", + "3 {'WALK': 710, 'TRANSIT': 7568} 2 1862.0 \n", + "4 {'WALK': 276, 'TRANSIT': 7161} 1 2167.0 \n", + "... ... ... ... \n", + "2046 {'WALK': 671, 'TRANSIT': 13312} 2 2605.0 \n", + "2047 {'WALK': 671, 'TRANSIT': 13312} 2 2605.0 \n", + "2048 {'WALK': 885, 'TRANSIT': 12087} 2 2322.0 \n", + "2049 {'WALK': 1945, 'TRANSIT': 4953} 1 2248.0 \n", + "2050 {'WALK': 441, 'TRANSIT': 5949} 2 2733.0 \n", + "\n", + " distance duration_static duration_per_transit \\\n", + "0 6810.0 1682.0 {'WALK': 608, 'TRANSIT': 804} \n", + "1 14070.0 2565.0 {'WALK': 750, 'TRANSIT': 1800} \n", + "2 9570.0 1774.0 {'WALK': 903, 'TRANSIT': 840} \n", + "3 8278.0 1862.0 {'WALK': 635, 'TRANSIT': 960} \n", + "4 9920.0 2167.0 {'WALK': 658, 'TRANSIT': 1200} \n", + "... ... ... ... \n", + "2046 14702.0 2605.0 {'WALK': 1467, 'TRANSIT': 1132} \n", + "2047 14702.0 2605.0 {'WALK': 1467, 'TRANSIT': 1132} \n", + "2048 12104.0 2322.0 {'WALK': 991, 'TRANSIT': 1080} \n", + "2049 6893.0 2248.0 {'WALK': 1671, 'TRANSIT': 576} \n", + "2050 6490.0 2733.0 {'WALK': 451, 'TRANSIT': 2280} \n", + "\n", + " distance_per_transit number_of_transit_stops \n", + "0 {'WALK': 582, 'TRANSIT': 6228} 2.0 \n", + "1 {'WALK': 856, 'TRANSIT': 13214} 1.0 \n", + "2 {'WALK': 1035, 'TRANSIT': 8535} 1.0 \n", + "3 {'WALK': 710, 'TRANSIT': 7568} 2.0 \n", + "4 {'WALK': 720, 'TRANSIT': 9200} 2.0 \n", + "... ... ... \n", + "2046 {'WALK': 1698, 'TRANSIT': 13004} 1.0 \n", + "2047 {'WALK': 1698, 'TRANSIT': 13004} 1.0 \n", + "2048 {'WALK': 1089, 'TRANSIT': 11015} 2.0 \n", + "2049 {'WALK': 1945, 'TRANSIT': 4948} 1.0 \n", + "2050 {'WALK': 488, 'TRANSIT': 6002} 1.0 \n", + "\n", + "[2051 rows x 17 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.concat([df.drop(['travel_time_fastest'], axis=1), df['travel_time_fastest'].apply(pd.Series)], axis=1)\n", + "df = pd.concat([df.drop(['travel_time_second'], axis=1), df['travel_time_second'].apply(pd.Series)], axis=1)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abcbde40-7432-4449-957a-79ce2ca126eb", + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, { "cell_type": "code", "execution_count": 5,