From ce632c795d1b5a73a5598d4892f319f539fdb45f Mon Sep 17 00:00:00 2001 From: Kadir Date: Mon, 25 Mar 2024 20:47:15 +0000 Subject: [PATCH] fixing gitignores --- .gitignore | 1 + crawler/.gitignore | 1 - .../exploration-checkpoint.ipynb | 1714 ----------------- .../pyproject-checkpoint.toml | 25 - crawler/data/.gitignore | 2 + 5 files changed, 3 insertions(+), 1740 deletions(-) delete mode 100644 crawler/.gitignore delete mode 100644 crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb delete mode 100644 crawler/.ipynb_checkpoints/pyproject-checkpoint.toml create mode 100644 crawler/data/.gitignore diff --git a/.gitignore b/.gitignore index c665ac1..310501d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__/ sqlite.db .idea/ .DS_Store +.ipynb_checkpoints/ diff --git a/crawler/.gitignore b/crawler/.gitignore deleted file mode 100644 index 10e00f3..0000000 --- a/crawler/.gitignore +++ /dev/null @@ -1 +0,0 @@ -data/rs diff --git a/crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb b/crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb deleted file mode 100644 index 480a582..0000000 --- a/crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb +++ /dev/null @@ -1,1714 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "f20bddee-1e7c-4c46-a17a-c7bb6c13f30c", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/kadir/code/realestate/crawler/venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "from data_access import Listing\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b1101088-9613-465f-81fd-79801e0202b8", - "metadata": {}, - "outputs": [], - "source": [ - "ls = Listing.get_all_listings()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "63e61601-7e3f-4d58-89f6-1794e4868cc3", - "metadata": {}, - "outputs": [], - "source": [ - "ds = [l.dict_nicely() for l in ls]" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "1c222721-f426-42c0-9ac5-badc1f7a2034", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sqm_ocrpriceprice_per_sqmurlbedroomstravel_time_fastesttravel_time_secondlease_leftdevelopment
07.81350000.044814.340589https://www.rightmove.co.uk/properties/1023607731{'duration': 2695, 'distance': 6467, 'duration...{'duration': 1682, 'distance': 6810, 'duration...119.0False
1NaN400000.0NaNhttps://www.rightmove.co.uk/properties/1058368493{'duration': 2565, 'distance': 14070, 'duratio...{'duration': 2565, 'distance': 14070, 'duratio...NaNFalse
276.91400000.05200.884150https://www.rightmove.co.uk/properties/1072332143{'duration': 1714, 'distance': 9570, 'duration...{'duration': 1774, 'distance': 9570, 'duration...91.0False
3112.40800000.07117.437722https://www.rightmove.co.uk/properties/1079768963{'duration': 1862, 'distance': 8278, 'duration...{'duration': 1862, 'distance': 8278, 'duration...NaNFalse
4115.60775000.06704.152249https://www.rightmove.co.uk/properties/1154994413{'duration': 2943, 'distance': 7437, 'duration...{'duration': 2167, 'distance': 9920, 'duration...NaNFalse
..............................
2129NaN750000.0NaNhttps://www.rightmove.co.uk/properties/868135083{'duration': 2400, 'distance': 13983, 'duratio...{'duration': 2605, 'distance': 14702, 'duratio...NaNTrue
2130NaN655000.0NaNhttps://www.rightmove.co.uk/properties/868135233{'duration': 2400, 'distance': 13983, 'duratio...{'duration': 2605, 'distance': 14702, 'duratio...NaNTrue
213182.80550000.06642.512077https://www.rightmove.co.uk/properties/868146693{'duration': 2310, 'distance': 12972, 'duratio...{'duration': 2322, 'distance': 12104, 'duratio...0.0False
21325.52300000.054347.826087https://www.rightmove.co.uk/properties/869559583{'duration': 2332, 'distance': 6898, 'duration...{'duration': 2248, 'distance': 6893, 'duration...105.0False
213381.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262{'duration': 1393, 'distance': 6390, 'duration...{'duration': 2733, 'distance': 6490, 'duration...993.0False
\n", - "

2134 rows × 9 columns

\n", - "
" - ], - "text/plain": [ - " sqm_ocr price price_per_sqm \\\n", - "0 7.81 350000.0 44814.340589 \n", - "1 NaN 400000.0 NaN \n", - "2 76.91 400000.0 5200.884150 \n", - "3 112.40 800000.0 7117.437722 \n", - "4 115.60 775000.0 6704.152249 \n", - "... ... ... ... \n", - "2129 NaN 750000.0 NaN \n", - "2130 NaN 655000.0 NaN \n", - "2131 82.80 550000.0 6642.512077 \n", - "2132 5.52 300000.0 54347.826087 \n", - "2133 81.60 790000.0 9681.372549 \n", - "\n", - " url bedrooms \\\n", - "0 https://www.rightmove.co.uk/properties/102360773 1 \n", - "1 https://www.rightmove.co.uk/properties/105836849 3 \n", - "2 https://www.rightmove.co.uk/properties/107233214 3 \n", - "3 https://www.rightmove.co.uk/properties/107976896 3 \n", - "4 https://www.rightmove.co.uk/properties/115499441 3 \n", - "... ... ... \n", - "2129 https://www.rightmove.co.uk/properties/86813508 3 \n", - "2130 https://www.rightmove.co.uk/properties/86813523 3 \n", - "2131 https://www.rightmove.co.uk/properties/86814669 3 \n", - "2132 https://www.rightmove.co.uk/properties/86955958 3 \n", - "2133 https://www.rightmove.co.uk/properties/86972726 2 \n", - "\n", - " travel_time_fastest \\\n", - "0 {'duration': 2695, 'distance': 6467, 'duration... \n", - "1 {'duration': 2565, 'distance': 14070, 'duratio... \n", - "2 {'duration': 1714, 'distance': 9570, 'duration... \n", - "3 {'duration': 1862, 'distance': 8278, 'duration... \n", - "4 {'duration': 2943, 'distance': 7437, 'duration... \n", - "... ... \n", - "2129 {'duration': 2400, 'distance': 13983, 'duratio... \n", - "2130 {'duration': 2400, 'distance': 13983, 'duratio... \n", - "2131 {'duration': 2310, 'distance': 12972, 'duratio... \n", - "2132 {'duration': 2332, 'distance': 6898, 'duration... \n", - "2133 {'duration': 1393, 'distance': 6390, 'duration... \n", - "\n", - " travel_time_second lease_left \\\n", - "0 {'duration': 1682, 'distance': 6810, 'duration... 119.0 \n", - "1 {'duration': 2565, 'distance': 14070, 'duratio... NaN \n", - "2 {'duration': 1774, 'distance': 9570, 'duration... 91.0 \n", - "3 {'duration': 1862, 'distance': 8278, 'duration... NaN \n", - "4 {'duration': 2167, 'distance': 9920, 'duration... NaN \n", - "... ... ... \n", - "2129 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", - "2130 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", - "2131 {'duration': 2322, 'distance': 12104, 'duratio... 0.0 \n", - "2132 {'duration': 2248, 'distance': 6893, 'duration... 105.0 \n", - "2133 {'duration': 2733, 'distance': 6490, 'duration... 993.0 \n", - "\n", - " development \n", - "0 False \n", - "1 False \n", - "2 False \n", - "3 False \n", - "4 False \n", - "... ... \n", - "2129 True \n", - "2130 True \n", - "2131 False \n", - "2132 False \n", - "2133 False \n", - "\n", - "[2134 rows x 9 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame(ds)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "387c48d9-26c6-4bed-8201-352735c06acb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1682.0\n", - "1 2565.0\n", - "2 1714.0\n", - "3 1862.0\n", - "4 2167.0\n", - " ... \n", - "2129 2400.0\n", - "2130 2400.0\n", - "2131 2310.0\n", - "2132 2248.0\n", - "2133 1393.0\n", - "Length: 2134, dtype: float64" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "7b37ad6b-9b0a-444e-b8c3-6fe4e43e42cb", - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/var/folders/wl/kx43lvyn6yv7lq988gwrkq_m0000gn/T/ipykernel_33778/1787981707.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0ms2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'travel_time_second'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdropcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0ms2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'b_'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mc\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ms2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mdf2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'travel_time_fastest'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'travel_time_second'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ms1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ms2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'min_duration'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ma_duration\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mb_duration\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/code/realestate/crawler/venv/lib/python3.12/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1574\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mfinal\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1575\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mNoReturn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1576\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 1577\u001b[0m \u001b[0;34mf\"\u001b[0m\u001b[0;34mThe truth value of a \u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m is ambiguous. \u001b[0m\u001b[0;34m\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1578\u001b[0m \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1579\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." - ] - } - ], - "source": [ - "dropcolumns = ['distance_per_transit', 'duration_static', 'distance']\n", - "s1 = df['travel_time_fastest'].apply(pd.Series).drop(dropcolumns, axis=1)\n", - "s1.columns = ['a_' + c for c in s1.columns]\n", - "\n", - "s2 = df['travel_time_second'].apply(pd.Series).drop(dropcolumns, axis=1)\n", - "s2.columns = ['b_' + c for c in s2.columns]\n", - "\n", - "df2 = pd.concat([df.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1, s2], axis=1)\n", - "df2.loc[:, 'min_duration'] = df2.loc[:, ['a_duration', 'b_duration']].min(axis=1)\n", - "df2" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "8c75aaa6-6113-482f-809b-11e405510184", - "metadata": {}, - "outputs": [], - "source": [ - "df2.to_clipboard()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "79f99692-91e8-4915-9b57-7b3a1efd7d3a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sqm_ocrpriceprice_per_sqmurlbedroomsdurationdistanceduration_staticduration_per_transitdistance_per_transitnumber_of_transit_stopsdurationdistanceduration_staticduration_per_transitdistance_per_transitnumber_of_transit_stops
07.81350000.044814.340589https://www.rightmove.co.uk/properties/1023607731269564672695{'WALK': 414, 'TRANSIT': 2280}{'WALK': 465, 'TRANSIT': 6002}11682.06810.01682.0{'WALK': 608, 'TRANSIT': 804}{'WALK': 582, 'TRANSIT': 6228}2.0
1NaN400000.0NaNhttps://www.rightmove.co.uk/properties/10583684932565140702565{'WALK': 750, 'TRANSIT': 1800}{'WALK': 856, 'TRANSIT': 13214}12565.014070.02565.0{'WALK': 750, 'TRANSIT': 1800}{'WALK': 856, 'TRANSIT': 13214}1.0
276.91400000.05200.884150https://www.rightmove.co.uk/properties/1072332143171495701714{'WALK': 903, 'TRANSIT': 780}{'WALK': 1035, 'TRANSIT': 8535}11774.09570.01774.0{'WALK': 903, 'TRANSIT': 840}{'WALK': 1035, 'TRANSIT': 8535}1.0
3112.40800000.07117.437722https://www.rightmove.co.uk/properties/1079768963186282781862{'WALK': 635, 'TRANSIT': 900}{'WALK': 710, 'TRANSIT': 7568}21862.08278.01862.0{'WALK': 635, 'TRANSIT': 960}{'WALK': 710, 'TRANSIT': 7568}2.0
4115.60775000.06704.152249https://www.rightmove.co.uk/properties/1154994413294374372943{'WALK': 242, 'TRANSIT': 2700}{'WALK': 276, 'TRANSIT': 7161}12167.09920.02167.0{'WALK': 658, 'TRANSIT': 1200}{'WALK': 720, 'TRANSIT': 9200}2.0
......................................................
2046NaN750000.0NaNhttps://www.rightmove.co.uk/properties/8681350832400139832400{'WALK': 603, 'TRANSIT': 1524}{'WALK': 671, 'TRANSIT': 13312}22605.014702.02605.0{'WALK': 1467, 'TRANSIT': 1132}{'WALK': 1698, 'TRANSIT': 13004}1.0
2047NaN655000.0NaNhttps://www.rightmove.co.uk/properties/8681352332400139832400{'WALK': 603, 'TRANSIT': 1524}{'WALK': 671, 'TRANSIT': 13312}22605.014702.02605.0{'WALK': 1467, 'TRANSIT': 1132}{'WALK': 1698, 'TRANSIT': 13004}1.0
204882.80550000.06642.512077https://www.rightmove.co.uk/properties/8681466932310129722310{'WALK': 786, 'TRANSIT': 1277}{'WALK': 885, 'TRANSIT': 12087}22322.012104.02322.0{'WALK': 991, 'TRANSIT': 1080}{'WALK': 1089, 'TRANSIT': 11015}2.0
20495.52300000.054347.826087https://www.rightmove.co.uk/properties/869559583233268982332{'WALK': 1671, 'TRANSIT': 660}{'WALK': 1945, 'TRANSIT': 4953}12248.06893.02248.0{'WALK': 1671, 'TRANSIT': 576}{'WALK': 1945, 'TRANSIT': 4948}1.0
205081.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262139363901393{'WALK': 518, 'TRANSIT': 564}{'WALK': 441, 'TRANSIT': 5949}22733.06490.02733.0{'WALK': 451, 'TRANSIT': 2280}{'WALK': 488, 'TRANSIT': 6002}1.0
\n", - "

2051 rows × 17 columns

\n", - "
" - ], - "text/plain": [ - " sqm_ocr price price_per_sqm \\\n", - "0 7.81 350000.0 44814.340589 \n", - "1 NaN 400000.0 NaN \n", - "2 76.91 400000.0 5200.884150 \n", - "3 112.40 800000.0 7117.437722 \n", - "4 115.60 775000.0 6704.152249 \n", - "... ... ... ... \n", - "2046 NaN 750000.0 NaN \n", - "2047 NaN 655000.0 NaN \n", - "2048 82.80 550000.0 6642.512077 \n", - "2049 5.52 300000.0 54347.826087 \n", - "2050 81.60 790000.0 9681.372549 \n", - "\n", - " url bedrooms duration \\\n", - "0 https://www.rightmove.co.uk/properties/102360773 1 2695 \n", - "1 https://www.rightmove.co.uk/properties/105836849 3 2565 \n", - "2 https://www.rightmove.co.uk/properties/107233214 3 1714 \n", - "3 https://www.rightmove.co.uk/properties/107976896 3 1862 \n", - "4 https://www.rightmove.co.uk/properties/115499441 3 2943 \n", - "... ... ... ... \n", - "2046 https://www.rightmove.co.uk/properties/86813508 3 2400 \n", - "2047 https://www.rightmove.co.uk/properties/86813523 3 2400 \n", - "2048 https://www.rightmove.co.uk/properties/86814669 3 2310 \n", - "2049 https://www.rightmove.co.uk/properties/86955958 3 2332 \n", - "2050 https://www.rightmove.co.uk/properties/86972726 2 1393 \n", - "\n", - " distance duration_static duration_per_transit \\\n", - "0 6467 2695 {'WALK': 414, 'TRANSIT': 2280} \n", - "1 14070 2565 {'WALK': 750, 'TRANSIT': 1800} \n", - "2 9570 1714 {'WALK': 903, 'TRANSIT': 780} \n", - "3 8278 1862 {'WALK': 635, 'TRANSIT': 900} \n", - "4 7437 2943 {'WALK': 242, 'TRANSIT': 2700} \n", - "... ... ... ... \n", - "2046 13983 2400 {'WALK': 603, 'TRANSIT': 1524} \n", - "2047 13983 2400 {'WALK': 603, 'TRANSIT': 1524} \n", - "2048 12972 2310 {'WALK': 786, 'TRANSIT': 1277} \n", - "2049 6898 2332 {'WALK': 1671, 'TRANSIT': 660} \n", - "2050 6390 1393 {'WALK': 518, 'TRANSIT': 564} \n", - "\n", - " distance_per_transit number_of_transit_stops duration \\\n", - "0 {'WALK': 465, 'TRANSIT': 6002} 1 1682.0 \n", - "1 {'WALK': 856, 'TRANSIT': 13214} 1 2565.0 \n", - "2 {'WALK': 1035, 'TRANSIT': 8535} 1 1774.0 \n", - "3 {'WALK': 710, 'TRANSIT': 7568} 2 1862.0 \n", - "4 {'WALK': 276, 'TRANSIT': 7161} 1 2167.0 \n", - "... ... ... ... \n", - "2046 {'WALK': 671, 'TRANSIT': 13312} 2 2605.0 \n", - "2047 {'WALK': 671, 'TRANSIT': 13312} 2 2605.0 \n", - "2048 {'WALK': 885, 'TRANSIT': 12087} 2 2322.0 \n", - "2049 {'WALK': 1945, 'TRANSIT': 4953} 1 2248.0 \n", - "2050 {'WALK': 441, 'TRANSIT': 5949} 2 2733.0 \n", - "\n", - " distance duration_static duration_per_transit \\\n", - "0 6810.0 1682.0 {'WALK': 608, 'TRANSIT': 804} \n", - "1 14070.0 2565.0 {'WALK': 750, 'TRANSIT': 1800} \n", - "2 9570.0 1774.0 {'WALK': 903, 'TRANSIT': 840} \n", - "3 8278.0 1862.0 {'WALK': 635, 'TRANSIT': 960} \n", - "4 9920.0 2167.0 {'WALK': 658, 'TRANSIT': 1200} \n", - "... ... ... ... \n", - "2046 14702.0 2605.0 {'WALK': 1467, 'TRANSIT': 1132} \n", - "2047 14702.0 2605.0 {'WALK': 1467, 'TRANSIT': 1132} \n", - "2048 12104.0 2322.0 {'WALK': 991, 'TRANSIT': 1080} \n", - "2049 6893.0 2248.0 {'WALK': 1671, 'TRANSIT': 576} \n", - "2050 6490.0 2733.0 {'WALK': 451, 'TRANSIT': 2280} \n", - "\n", - " distance_per_transit number_of_transit_stops \n", - "0 {'WALK': 582, 'TRANSIT': 6228} 2.0 \n", - "1 {'WALK': 856, 'TRANSIT': 13214} 1.0 \n", - "2 {'WALK': 1035, 'TRANSIT': 8535} 1.0 \n", - "3 {'WALK': 710, 'TRANSIT': 7568} 2.0 \n", - "4 {'WALK': 720, 'TRANSIT': 9200} 2.0 \n", - "... ... ... \n", - "2046 {'WALK': 1698, 'TRANSIT': 13004} 1.0 \n", - "2047 {'WALK': 1698, 'TRANSIT': 13004} 1.0 \n", - "2048 {'WALK': 1089, 'TRANSIT': 11015} 2.0 \n", - "2049 {'WALK': 1945, 'TRANSIT': 4948} 1.0 \n", - "2050 {'WALK': 488, 'TRANSIT': 6002} 1.0 \n", - "\n", - "[2051 rows x 17 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.concat([df.drop(['travel_time_fastest'], axis=1), df['travel_time_fastest'].apply(pd.Series)], axis=1)\n", - "df = pd.concat([df.drop(['travel_time_second'], axis=1), df['travel_time_second'].apply(pd.Series)], axis=1)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "abcbde40-7432-4449-957a-79ce2ca126eb", - "metadata": {}, - "outputs": [], - "source": [ - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "99c5b304-3d13-466b-a9f5-83a5db6311b5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sqm_ocrpriceprice_per_sqmurlbedrooms
95374142.40550000.07.418157https://www.rightmove.co.uk/properties/1455465382
24921850.10725000.033.180626https://www.rightmove.co.uk/properties/1408217362
177137.50695000.05054.545455https://www.rightmove.co.uk/properties/1393542592
35285.50475000.05555.555556https://www.rightmove.co.uk/properties/1421423482
3682.90475000.05729.794934https://www.rightmove.co.uk/properties/1289259502
700101.00650000.06435.643564https://www.rightmove.co.uk/properties/1445915721
82380.40525000.06529.850746https://www.rightmove.co.uk/properties/1450517692
21491.51650000.07103.048847https://www.rightmove.co.uk/properties/1403263072
17191.10650000.07135.016465https://www.rightmove.co.uk/properties/1392454282
598109.40795000.07266.910420https://www.rightmove.co.uk/properties/1440346552
14693.78700000.07464.278098https://www.rightmove.co.uk/properties/1385103952
711100.31750000.07476.821852https://www.rightmove.co.uk/properties/1446203032
59286.49650000.07515.319690https://www.rightmove.co.uk/properties/1439876692
624101.35775000.07646.768624https://www.rightmove.co.uk/properties/1442179221
5597.60750000.07684.426230https://www.rightmove.co.uk/properties/1325647372
85181.01625000.07715.096902https://www.rightmove.co.uk/properties/1451725042
35689.00695000.07808.988764https://www.rightmove.co.uk/properties/1421856232
74885.90675000.07857.974389https://www.rightmove.co.uk/properties/1447712812
20482.40650000.07888.349515https://www.rightmove.co.uk/properties/1401733192
65483.33665000.07980.319213https://www.rightmove.co.uk/properties/1443611002
51487.50700000.08000.000000https://www.rightmove.co.uk/properties/1434603651
76292.90750000.08073.196986https://www.rightmove.co.uk/properties/1448467251
96392.90750000.08073.196986https://www.rightmove.co.uk/properties/1455652522
93886.10699000.08118.466899https://www.rightmove.co.uk/properties/1454911372
40390.60750000.08278.145695https://www.rightmove.co.uk/properties/1427044162
30293.30795000.08520.900322https://www.rightmove.co.uk/properties/1415954332
67193.27800000.08577.248847https://www.rightmove.co.uk/properties/1444291402
34980.57695000.08626.039469https://www.rightmove.co.uk/properties/1421159182
22490.10800000.08879.023307https://www.rightmove.co.uk/properties/1404644812
74081.70735000.08996.328029https://www.rightmove.co.uk/properties/1447224142
32687.30800000.09163.802978https://www.rightmove.co.uk/properties/1418460232
31981.57750000.09194.556822https://www.rightmove.co.uk/properties/1417973572
55886.77800000.09219.776420https://www.rightmove.co.uk/properties/1437587632
71286.30800000.09269.988413https://www.rightmove.co.uk/properties/1446221572
47386.00800000.09302.325581https://www.rightmove.co.uk/properties/1432101022
10580.40750000.09328.358209https://www.rightmove.co.uk/properties/1369887262
100380.27750000.09343.465803https://www.rightmove.co.uk/properties/867752912
23582.80775000.09359.903382https://www.rightmove.co.uk/properties/1406110552
6585.10800000.09400.705053https://www.rightmove.co.uk/properties/1341162322
3083.70795000.09498.207885https://www.rightmove.co.uk/properties/1277879601
102581.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262
8881.75800000.09785.932722https://www.rightmove.co.uk/properties/1360121931
45480.80800000.09900.990099https://www.rightmove.co.uk/properties/1431388672
34380.64800000.09920.634921https://www.rightmove.co.uk/properties/1420329352
\n", - "
" - ], - "text/plain": [ - " sqm_ocr price price_per_sqm \\\n", - "953 74142.40 550000.0 7.418157 \n", - "249 21850.10 725000.0 33.180626 \n", - "177 137.50 695000.0 5054.545455 \n", - "352 85.50 475000.0 5555.555556 \n", - "36 82.90 475000.0 5729.794934 \n", - "700 101.00 650000.0 6435.643564 \n", - "823 80.40 525000.0 6529.850746 \n", - "214 91.51 650000.0 7103.048847 \n", - "171 91.10 650000.0 7135.016465 \n", - "598 109.40 795000.0 7266.910420 \n", - "146 93.78 700000.0 7464.278098 \n", - "711 100.31 750000.0 7476.821852 \n", - "592 86.49 650000.0 7515.319690 \n", - "624 101.35 775000.0 7646.768624 \n", - "55 97.60 750000.0 7684.426230 \n", - "851 81.01 625000.0 7715.096902 \n", - "356 89.00 695000.0 7808.988764 \n", - "748 85.90 675000.0 7857.974389 \n", - "204 82.40 650000.0 7888.349515 \n", - "654 83.33 665000.0 7980.319213 \n", - "514 87.50 700000.0 8000.000000 \n", - "762 92.90 750000.0 8073.196986 \n", - "963 92.90 750000.0 8073.196986 \n", - "938 86.10 699000.0 8118.466899 \n", - "403 90.60 750000.0 8278.145695 \n", - "302 93.30 795000.0 8520.900322 \n", - "671 93.27 800000.0 8577.248847 \n", - "349 80.57 695000.0 8626.039469 \n", - "224 90.10 800000.0 8879.023307 \n", - "740 81.70 735000.0 8996.328029 \n", - "326 87.30 800000.0 9163.802978 \n", - "319 81.57 750000.0 9194.556822 \n", - "558 86.77 800000.0 9219.776420 \n", - "712 86.30 800000.0 9269.988413 \n", - "473 86.00 800000.0 9302.325581 \n", - "105 80.40 750000.0 9328.358209 \n", - "1003 80.27 750000.0 9343.465803 \n", - "235 82.80 775000.0 9359.903382 \n", - "65 85.10 800000.0 9400.705053 \n", - "30 83.70 795000.0 9498.207885 \n", - "1025 81.60 790000.0 9681.372549 \n", - "88 81.75 800000.0 9785.932722 \n", - "454 80.80 800000.0 9900.990099 \n", - "343 80.64 800000.0 9920.634921 \n", - "\n", - " url bedrooms \n", - "953 https://www.rightmove.co.uk/properties/145546538 2 \n", - "249 https://www.rightmove.co.uk/properties/140821736 2 \n", - "177 https://www.rightmove.co.uk/properties/139354259 2 \n", - "352 https://www.rightmove.co.uk/properties/142142348 2 \n", - "36 https://www.rightmove.co.uk/properties/128925950 2 \n", - "700 https://www.rightmove.co.uk/properties/144591572 1 \n", - "823 https://www.rightmove.co.uk/properties/145051769 2 \n", - "214 https://www.rightmove.co.uk/properties/140326307 2 \n", - "171 https://www.rightmove.co.uk/properties/139245428 2 \n", - "598 https://www.rightmove.co.uk/properties/144034655 2 \n", - "146 https://www.rightmove.co.uk/properties/138510395 2 \n", - "711 https://www.rightmove.co.uk/properties/144620303 2 \n", - "592 https://www.rightmove.co.uk/properties/143987669 2 \n", - "624 https://www.rightmove.co.uk/properties/144217922 1 \n", - "55 https://www.rightmove.co.uk/properties/132564737 2 \n", - "851 https://www.rightmove.co.uk/properties/145172504 2 \n", - "356 https://www.rightmove.co.uk/properties/142185623 2 \n", - "748 https://www.rightmove.co.uk/properties/144771281 2 \n", - "204 https://www.rightmove.co.uk/properties/140173319 2 \n", - "654 https://www.rightmove.co.uk/properties/144361100 2 \n", - "514 https://www.rightmove.co.uk/properties/143460365 1 \n", - "762 https://www.rightmove.co.uk/properties/144846725 1 \n", - "963 https://www.rightmove.co.uk/properties/145565252 2 \n", - "938 https://www.rightmove.co.uk/properties/145491137 2 \n", - "403 https://www.rightmove.co.uk/properties/142704416 2 \n", - "302 https://www.rightmove.co.uk/properties/141595433 2 \n", - "671 https://www.rightmove.co.uk/properties/144429140 2 \n", - "349 https://www.rightmove.co.uk/properties/142115918 2 \n", - "224 https://www.rightmove.co.uk/properties/140464481 2 \n", - "740 https://www.rightmove.co.uk/properties/144722414 2 \n", - "326 https://www.rightmove.co.uk/properties/141846023 2 \n", - "319 https://www.rightmove.co.uk/properties/141797357 2 \n", - "558 https://www.rightmove.co.uk/properties/143758763 2 \n", - "712 https://www.rightmove.co.uk/properties/144622157 2 \n", - "473 https://www.rightmove.co.uk/properties/143210102 2 \n", - "105 https://www.rightmove.co.uk/properties/136988726 2 \n", - "1003 https://www.rightmove.co.uk/properties/86775291 2 \n", - "235 https://www.rightmove.co.uk/properties/140611055 2 \n", - "65 https://www.rightmove.co.uk/properties/134116232 2 \n", - "30 https://www.rightmove.co.uk/properties/127787960 1 \n", - "1025 https://www.rightmove.co.uk/properties/86972726 2 \n", - "88 https://www.rightmove.co.uk/properties/136012193 1 \n", - "454 https://www.rightmove.co.uk/properties/143138867 2 \n", - "343 https://www.rightmove.co.uk/properties/142032935 2 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = df[df.sqm_ocr > 80]\n", - "df2.sort_values('price_per_sqm')" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "52545cfa-0932-46fe-ba7e-961fd43f2786", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sqm_ocrpriceprice_per_sqmurlbedrooms
35973.40400000.05449.591281https://www.rightmove.co.uk/properties/1421869912
29377.00425000.05519.480519https://www.rightmove.co.uk/properties/1414377832
35285.50475000.05555.555556https://www.rightmove.co.uk/properties/1421423482
68576.03425000.05589.898724https://www.rightmove.co.uk/properties/1444940122
3682.90475000.05729.794934https://www.rightmove.co.uk/properties/1289259502
49171.68450000.06277.901786https://www.rightmove.co.uk/properties/1433158402
102073.67495000.06719.152980https://www.rightmove.co.uk/properties/868079162
\n", - "
" - ], - "text/plain": [ - " sqm_ocr price price_per_sqm \\\n", - "359 73.40 400000.0 5449.591281 \n", - "293 77.00 425000.0 5519.480519 \n", - "352 85.50 475000.0 5555.555556 \n", - "685 76.03 425000.0 5589.898724 \n", - "36 82.90 475000.0 5729.794934 \n", - "491 71.68 450000.0 6277.901786 \n", - "1020 73.67 495000.0 6719.152980 \n", - "\n", - " url bedrooms \n", - "359 https://www.rightmove.co.uk/properties/142186991 2 \n", - "293 https://www.rightmove.co.uk/properties/141437783 2 \n", - "352 https://www.rightmove.co.uk/properties/142142348 2 \n", - "685 https://www.rightmove.co.uk/properties/144494012 2 \n", - "36 https://www.rightmove.co.uk/properties/128925950 2 \n", - "491 https://www.rightmove.co.uk/properties/143315840 2 \n", - "1020 https://www.rightmove.co.uk/properties/86807916 2 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = df[(df.sqm_ocr > 70) & (df.price <500000)]\n", - "df2.sort_values('price_per_sqm')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "d0246926-13ef-4110-8e3a-fb676a55c2a6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sqm_ocrpriceprice_per_sqmurlbedrooms
95374142.40550000.07.418157https://www.rightmove.co.uk/properties/1455465382
82380.40525000.06529.850746https://www.rightmove.co.uk/properties/1450517692
49273.90525000.07104.194858https://www.rightmove.co.uk/properties/1433173612
56176.70550000.07170.795306https://www.rightmove.co.uk/properties/1437807892
6973.10525000.07181.942544https://www.rightmove.co.uk/properties/1345745632
98872.40525000.07251.381215https://www.rightmove.co.uk/properties/866489252
27277.89575000.07382.205675https://www.rightmove.co.uk/properties/1411312972
52170.88525000.07406.884876https://www.rightmove.co.uk/properties/1435141492
32476.10575000.07555.847569https://www.rightmove.co.uk/properties/1418313532
101871.80550000.07660.167131https://www.rightmove.co.uk/properties/868048322
52678.00600000.07692.307692https://www.rightmove.co.uk/properties/1435521561
81771.17550000.07727.975270https://www.rightmove.co.uk/properties/1450359292
84377.50600000.07741.935484https://www.rightmove.co.uk/properties/1451449882
33770.60550000.07790.368272https://www.rightmove.co.uk/properties/1419042862
23375.70600000.07926.023778https://www.rightmove.co.uk/properties/1405822132
76375.00600000.08000.000000https://www.rightmove.co.uk/properties/1448620702
31571.30590000.08274.894811https://www.rightmove.co.uk/properties/1416696862
89971.47595000.08325.171401https://www.rightmove.co.uk/properties/1453629112
14771.50600000.08391.608392https://www.rightmove.co.uk/properties/1385375271
97370.89595000.08393.285372https://www.rightmove.co.uk/properties/862964912
\n", - "
" - ], - "text/plain": [ - " sqm_ocr price price_per_sqm \\\n", - "953 74142.40 550000.0 7.418157 \n", - "823 80.40 525000.0 6529.850746 \n", - "492 73.90 525000.0 7104.194858 \n", - "561 76.70 550000.0 7170.795306 \n", - "69 73.10 525000.0 7181.942544 \n", - "988 72.40 525000.0 7251.381215 \n", - "272 77.89 575000.0 7382.205675 \n", - "521 70.88 525000.0 7406.884876 \n", - "324 76.10 575000.0 7555.847569 \n", - "1018 71.80 550000.0 7660.167131 \n", - "526 78.00 600000.0 7692.307692 \n", - "817 71.17 550000.0 7727.975270 \n", - "843 77.50 600000.0 7741.935484 \n", - "337 70.60 550000.0 7790.368272 \n", - "233 75.70 600000.0 7926.023778 \n", - "763 75.00 600000.0 8000.000000 \n", - "315 71.30 590000.0 8274.894811 \n", - "899 71.47 595000.0 8325.171401 \n", - "147 71.50 600000.0 8391.608392 \n", - "973 70.89 595000.0 8393.285372 \n", - "\n", - " url bedrooms \n", - "953 https://www.rightmove.co.uk/properties/145546538 2 \n", - "823 https://www.rightmove.co.uk/properties/145051769 2 \n", - "492 https://www.rightmove.co.uk/properties/143317361 2 \n", - "561 https://www.rightmove.co.uk/properties/143780789 2 \n", - "69 https://www.rightmove.co.uk/properties/134574563 2 \n", - "988 https://www.rightmove.co.uk/properties/86648925 2 \n", - "272 https://www.rightmove.co.uk/properties/141131297 2 \n", - "521 https://www.rightmove.co.uk/properties/143514149 2 \n", - "324 https://www.rightmove.co.uk/properties/141831353 2 \n", - "1018 https://www.rightmove.co.uk/properties/86804832 2 \n", - "526 https://www.rightmove.co.uk/properties/143552156 1 \n", - "817 https://www.rightmove.co.uk/properties/145035929 2 \n", - "843 https://www.rightmove.co.uk/properties/145144988 2 \n", - "337 https://www.rightmove.co.uk/properties/141904286 2 \n", - "233 https://www.rightmove.co.uk/properties/140582213 2 \n", - "763 https://www.rightmove.co.uk/properties/144862070 2 \n", - "315 https://www.rightmove.co.uk/properties/141669686 2 \n", - "899 https://www.rightmove.co.uk/properties/145362911 2 \n", - "147 https://www.rightmove.co.uk/properties/138537527 1 \n", - "973 https://www.rightmove.co.uk/properties/86296491 2 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = df[(df.sqm_ocr > 70) & (df.price <=600000)& (df.price >500000)]\n", - "df2.sort_values('price_per_sqm')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9f2ca7b-8476-47f1-a34f-7acdfbe84bd1", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "edd9fa24-cad2-4448-9b17-c6d514564f41", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sqm_ocrpriceprice_per_sqmurlbedrooms
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [sqm_ocr, price, price_per_sqm, url, bedrooms]\n", - "Index: []" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.bedrooms > 2]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "002b2a3a-3ecc-45c1-8c2f-c143380ee0d5", - "metadata": {}, - "outputs": [], - "source": [ - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/crawler/.ipynb_checkpoints/pyproject-checkpoint.toml b/crawler/.ipynb_checkpoints/pyproject-checkpoint.toml deleted file mode 100644 index 37aa46d..0000000 --- a/crawler/.ipynb_checkpoints/pyproject-checkpoint.toml +++ /dev/null @@ -1,25 +0,0 @@ -[tool.poetry] -name = "rec" -version = "0.1.0" -description = "" -authors = ["Kadir Tugan "] - -[tool.poetry.dependencies] -python = ">3.11" -SQLAlchemy = "^2.0.23" -requests = "^2.31.0" -cachetools = "^5.3.2" -diskcache = "^5.6.3" -tqdm = "^4.66.2" -pillow = "^10.2.0" -torch = "^2.2.1" -numpy = "^1.26.4" -transformers = "^4.38.2" -pytesseract = "^0.3.10" -jupyterlab = "^4.1.4" - -[tool.poetry.dev-dependencies] - -[build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" diff --git a/crawler/data/.gitignore b/crawler/data/.gitignore new file mode 100644 index 0000000..66533e7 --- /dev/null +++ b/crawler/data/.gitignore @@ -0,0 +1,2 @@ +rs +rs.tar.gz