diff --git a/crawler/2_dump_detail.py b/crawler/2_dump_detail.py index 49ed4f3..62cad80 100644 --- a/crawler/2_dump_detail.py +++ b/crawler/2_dump_detail.py @@ -9,7 +9,7 @@ filtered_listings = [] for listing in listings: if not listing.path_detail_json().exists(): filtered_listings.append(listing) - + for listing in tqdm(filtered_listings): try: d = detail_query(listing.identifier) diff --git a/crawler/exploration.ipynb b/crawler/exploration.ipynb index 3a63d40..5972562 100644 --- a/crawler/exploration.ipynb +++ b/crawler/exploration.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "f20bddee-1e7c-4c46-a17a-c7bb6c13f30c", + "id": "38e8690a-f6f7-4e14-a657-f20605477afd", "metadata": {}, "outputs": [ { @@ -20,51 +20,172 @@ "import pandas as pd" ] }, + { + "cell_type": "markdown", + "id": "cfe2ab03-3204-4fd8-b76a-a734f6b87d75", + "metadata": {}, + "source": [ + "### Fetch previous decisions" + ] + }, { "cell_type": "code", "execution_count": 2, - "id": "b1101088-9613-465f-81fd-79801e0202b8", + "id": "db55b615-698c-4f5d-881a-ea1d3b6d6205", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(93, 2)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
identifierdecision
2047145699277n
1442144642851n
1839145394765n
1853145418669removed
930143205230n
\n", + "
" + ], + "text/plain": [ + " identifier decision\n", + "2047 145699277 n\n", + "1442 144642851 n\n", + "1839 145394765 n\n", + "1853 145418669 removed\n", + "930 143205230 n" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "ls = Listing.get_all_listings()\n", - "\n", - "decisions = {\n", - " 142789514: 'n',\n", - " 136010102: 'n',\n", - " 141457334: 'y',\n", - " 86778015: 'n',\n", - " 134574563: 'n',\n", - " 86648925: 'n',\n", - " 143319068: 'n',\n", - " 135668207: 'n',\n", - " 142063949: 'n',\n", - " 145051769: 'n',\n", - " 138945719: 'n',\n", - " 135714833: 'n',\n", - " 144983192: 'n',\n", - " 144666920: 'n',\n", - " 143895080: 'n',\n", - " 141114200: 'n',\n", - " 145407389: 'n',\n", - " 145047533: 'n',\n", - " 145161722: 'n',\n", - " 145130066: 'n',\n", - " 142110470: 'n',\n", - " 133667606: 'n',\n", - " 145005536: 'n',\n", - " 143458961: 'n',\n", - " 141412010: 'y',\n", - " 138683339: 'n',\n", - " 138490370: 'n',\n", - " 137805509: 'n',\n", - " 135854261: 'n',\n", - "}" + "decisions = pd.read_clipboard()\n", + "decisions = decisions.loc[decisions.decision.notna(), ['identifier', 'decision']]\n", + "print(decisions.shape)\n", + "decisions.head()\n" ] }, { "cell_type": "code", "execution_count": 3, + "id": "86224a20-53e1-403c-8d9f-71b9a9df750c", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "output:\n", + "{145699277: 'n',\n", + " 144642851: 'n',\n", + " 145394765: 'n',\n", + " 145418669: 'removed',\n", + " 143205230: 'n',\n", + " 140628560: 'eigentlich geil',\n", + " ...\n", + "}\n", + "\"\"\"\n", + "decisions = decisions.set_index('identifier').decision.to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec257220-f170-41b8-9f9d-b8ef61512acf", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6dbd25bd-802d-4953-83c3-f01640174353", + "metadata": {}, + "outputs": [], + "source": [ + "# Use if we want to skip at the bottom\n", + "# decisions = {}" + ] + }, + { + "cell_type": "markdown", + "id": "7c1ee5eb-1000-4ced-983c-df47fb6ceae8", + "metadata": {}, + "source": [ + "### Get all data prepped for sheets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f20bddee-1e7c-4c46-a17a-c7bb6c13f30c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b1101088-9613-465f-81fd-79801e0202b8", + "metadata": {}, + "outputs": [], + "source": [ + "ls = Listing.get_all_listings()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "63e61601-7e3f-4d58-89f6-1794e4868cc3", "metadata": {}, "outputs": [], @@ -74,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "id": "1c222721-f426-42c0-9ac5-badc1f7a2034", "metadata": {}, "outputs": [ @@ -115,70 +236,70 @@ " \n", " \n", " 0\n", - " 102360773\n", + " 100506851\n", + " 58.4\n", + " 525000.0\n", + " 8989.726027\n", + " https://www.rightmove.co.uk/properties/100506851\n", + " 2\n", + " {'duration': 1948, 'distance': 10927, 'duratio...\n", + " {'duration': 1948, 'distance': 10927, 'duratio...\n", " NaN\n", - " 350000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/102360773\n", - " 1\n", - " {'duration': 2695, 'distance': 6467, 'duration...\n", - " {'duration': 1682, 'distance': 6810, 'duration...\n", - " 119.0\n", " False\n", " None\n", " \n", " \n", " 1\n", - " 105836849\n", + " 100938761\n", " NaN\n", - " 400000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/105836849\n", - " 3\n", - " {'duration': 2565, 'distance': 14070, 'duratio...\n", - " {'duration': 2565, 'distance': 14070, 'duratio...\n", + " 390000.0\n", " NaN\n", + " https://www.rightmove.co.uk/properties/100938761\n", + " 1\n", + " None\n", + " None\n", + " 996.0\n", " False\n", " None\n", " \n", " \n", " 2\n", - " 107233214\n", - " 76.91\n", - " 400000.0\n", - " 5200.884150\n", - " https://www.rightmove.co.uk/properties/107233214\n", - " 3\n", - " {'duration': 1714, 'distance': 9570, 'duration...\n", - " {'duration': 1774, 'distance': 9570, 'duration...\n", + " 101817179\n", + " 53.2\n", + " 495000.0\n", + " 9304.511278\n", + " https://www.rightmove.co.uk/properties/101817179\n", + " 1\n", + " {'duration': 2702, 'distance': 8637, 'duration...\n", + " {'duration': 3333, 'distance': 10013, 'duratio...\n", " 91.0\n", " False\n", " None\n", " \n", " \n", " 3\n", - " 107976896\n", - " 112.40\n", - " 800000.0\n", - " 7117.437722\n", - " https://www.rightmove.co.uk/properties/107976896\n", - " 3\n", - " {'duration': 1862, 'distance': 8278, 'duration...\n", - " {'duration': 1862, 'distance': 8278, 'duration...\n", - " NaN\n", + " 101939660\n", + " 56.5\n", + " 238000.0\n", + " 4212.389381\n", + " https://www.rightmove.co.uk/properties/101939660\n", + " 2\n", + " {'duration': 2262, 'distance': 13512, 'duratio...\n", + " {'duration': 2322, 'distance': 13491, 'duratio...\n", + " 0.0\n", " False\n", " None\n", " \n", " \n", " 4\n", - " 115499441\n", - " 115.60\n", - " 775000.0\n", - " 6704.152249\n", - " https://www.rightmove.co.uk/properties/115499441\n", - " 3\n", - " {'duration': 2943, 'distance': 7437, 'duration...\n", - " {'duration': 2167, 'distance': 9920, 'duration...\n", + " 102103157\n", + " NaN\n", + " 425000.0\n", + " NaN\n", + " https://www.rightmove.co.uk/properties/102103157\n", + " 1\n", + " None\n", + " None\n", " NaN\n", " False\n", " None\n", @@ -198,150 +319,150 @@ " ...\n", " \n", " \n", - " 2181\n", - " 86813508\n", - " NaN\n", - " 750000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/86813508\n", - " 3\n", - " {'duration': 2400, 'distance': 13983, 'duratio...\n", - " {'duration': 2605, 'distance': 14702, 'duratio...\n", - " NaN\n", - " True\n", - " None\n", - " \n", - " \n", - " 2182\n", - " 86813523\n", - " NaN\n", - " 655000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/86813523\n", - " 3\n", - " {'duration': 2400, 'distance': 13983, 'duratio...\n", - " {'duration': 2605, 'distance': 14702, 'duratio...\n", - " NaN\n", - " True\n", - " None\n", - " \n", - " \n", - " 2183\n", - " 86814669\n", - " 82.80\n", - " 550000.0\n", - " 6642.512077\n", - " https://www.rightmove.co.uk/properties/86814669\n", - " 3\n", - " {'duration': 2310, 'distance': 12972, 'duratio...\n", - " {'duration': 2322, 'distance': 12104, 'duratio...\n", - " 0.0\n", - " False\n", - " None\n", - " \n", - " \n", - " 2184\n", - " 86955958\n", - " 90.00\n", - " 300000.0\n", - " 3333.333333\n", - " https://www.rightmove.co.uk/properties/86955958\n", - " 3\n", - " {'duration': 2332, 'distance': 6898, 'duration...\n", - " {'duration': 2248, 'distance': 6893, 'duration...\n", - " 105.0\n", - " False\n", - " None\n", - " \n", - " \n", - " 2185\n", - " 86972726\n", - " 81.60\n", - " 790000.0\n", - " 9681.372549\n", - " https://www.rightmove.co.uk/properties/86972726\n", - " 2\n", - " {'duration': 1393, 'distance': 6390, 'duration...\n", - " {'duration': 2733, 'distance': 6490, 'duration...\n", + " 34537\n", + " 97023443\n", + " 8.3\n", + " 699999.0\n", + " 84337.228916\n", + " https://www.rightmove.co.uk/properties/97023443\n", + " 1\n", + " {'duration': 1704, 'distance': 8729, 'duration...\n", + " {'duration': 1713, 'distance': 6755, 'duration...\n", " 993.0\n", + " True\n", + " None\n", + " \n", + " \n", + " 34538\n", + " 97124237\n", + " 53.4\n", + " 300000.0\n", + " 5617.977528\n", + " https://www.rightmove.co.uk/properties/97124237\n", + " 1\n", + " None\n", + " None\n", + " NaN\n", " False\n", " None\n", " \n", + " \n", + " 34539\n", + " 97335680\n", + " 48.0\n", + " 315000.0\n", + " 6562.500000\n", + " https://www.rightmove.co.uk/properties/97335680\n", + " 2\n", + " None\n", + " None\n", + " NaN\n", + " False\n", + " None\n", + " \n", + " \n", + " 34540\n", + " 97522346\n", + " NaN\n", + " 400000.0\n", + " NaN\n", + " https://www.rightmove.co.uk/properties/97522346\n", + " 2\n", + " None\n", + " None\n", + " NaN\n", + " False\n", + " None\n", + " \n", + " \n", + " 34541\n", + " 98352914\n", + " NaN\n", + " 399950.0\n", + " NaN\n", + " https://www.rightmove.co.uk/properties/98352914\n", + " 2\n", + " None\n", + " None\n", + " 125.0\n", + " True\n", + " None\n", + " \n", " \n", "\n", - "

2186 rows × 11 columns

\n", + "

34542 rows × 11 columns

\n", "" ], "text/plain": [ - " identifier sqm_ocr price price_per_sqm \\\n", - "0 102360773 NaN 350000.0 NaN \n", - "1 105836849 NaN 400000.0 NaN \n", - "2 107233214 76.91 400000.0 5200.884150 \n", - "3 107976896 112.40 800000.0 7117.437722 \n", - "4 115499441 115.60 775000.0 6704.152249 \n", - "... ... ... ... ... \n", - "2181 86813508 NaN 750000.0 NaN \n", - "2182 86813523 NaN 655000.0 NaN \n", - "2183 86814669 82.80 550000.0 6642.512077 \n", - "2184 86955958 90.00 300000.0 3333.333333 \n", - "2185 86972726 81.60 790000.0 9681.372549 \n", + " identifier sqm_ocr price price_per_sqm \\\n", + "0 100506851 58.4 525000.0 8989.726027 \n", + "1 100938761 NaN 390000.0 NaN \n", + "2 101817179 53.2 495000.0 9304.511278 \n", + "3 101939660 56.5 238000.0 4212.389381 \n", + "4 102103157 NaN 425000.0 NaN \n", + "... ... ... ... ... \n", + "34537 97023443 8.3 699999.0 84337.228916 \n", + "34538 97124237 53.4 300000.0 5617.977528 \n", + "34539 97335680 48.0 315000.0 6562.500000 \n", + "34540 97522346 NaN 400000.0 NaN \n", + "34541 98352914 NaN 399950.0 NaN \n", "\n", - " url bedrooms \\\n", - "0 https://www.rightmove.co.uk/properties/102360773 1 \n", - "1 https://www.rightmove.co.uk/properties/105836849 3 \n", - "2 https://www.rightmove.co.uk/properties/107233214 3 \n", - "3 https://www.rightmove.co.uk/properties/107976896 3 \n", - "4 https://www.rightmove.co.uk/properties/115499441 3 \n", - "... ... ... \n", - "2181 https://www.rightmove.co.uk/properties/86813508 3 \n", - "2182 https://www.rightmove.co.uk/properties/86813523 3 \n", - "2183 https://www.rightmove.co.uk/properties/86814669 3 \n", - "2184 https://www.rightmove.co.uk/properties/86955958 3 \n", - "2185 https://www.rightmove.co.uk/properties/86972726 2 \n", + " url bedrooms \\\n", + "0 https://www.rightmove.co.uk/properties/100506851 2 \n", + "1 https://www.rightmove.co.uk/properties/100938761 1 \n", + "2 https://www.rightmove.co.uk/properties/101817179 1 \n", + "3 https://www.rightmove.co.uk/properties/101939660 2 \n", + "4 https://www.rightmove.co.uk/properties/102103157 1 \n", + "... ... ... \n", + "34537 https://www.rightmove.co.uk/properties/97023443 1 \n", + "34538 https://www.rightmove.co.uk/properties/97124237 1 \n", + "34539 https://www.rightmove.co.uk/properties/97335680 2 \n", + "34540 https://www.rightmove.co.uk/properties/97522346 2 \n", + "34541 https://www.rightmove.co.uk/properties/98352914 2 \n", "\n", - " travel_time_fastest \\\n", - "0 {'duration': 2695, 'distance': 6467, 'duration... \n", - "1 {'duration': 2565, 'distance': 14070, 'duratio... \n", - "2 {'duration': 1714, 'distance': 9570, 'duration... \n", - "3 {'duration': 1862, 'distance': 8278, 'duration... \n", - "4 {'duration': 2943, 'distance': 7437, 'duration... \n", - "... ... \n", - "2181 {'duration': 2400, 'distance': 13983, 'duratio... \n", - "2182 {'duration': 2400, 'distance': 13983, 'duratio... \n", - "2183 {'duration': 2310, 'distance': 12972, 'duratio... \n", - "2184 {'duration': 2332, 'distance': 6898, 'duration... \n", - "2185 {'duration': 1393, 'distance': 6390, 'duration... \n", + " travel_time_fastest \\\n", + "0 {'duration': 1948, 'distance': 10927, 'duratio... \n", + "1 None \n", + "2 {'duration': 2702, 'distance': 8637, 'duration... \n", + "3 {'duration': 2262, 'distance': 13512, 'duratio... \n", + "4 None \n", + "... ... \n", + "34537 {'duration': 1704, 'distance': 8729, 'duration... \n", + "34538 None \n", + "34539 None \n", + "34540 None \n", + "34541 None \n", "\n", - " travel_time_second lease_left \\\n", - "0 {'duration': 1682, 'distance': 6810, 'duration... 119.0 \n", - "1 {'duration': 2565, 'distance': 14070, 'duratio... NaN \n", - "2 {'duration': 1774, 'distance': 9570, 'duration... 91.0 \n", - "3 {'duration': 1862, 'distance': 8278, 'duration... NaN \n", - "4 {'duration': 2167, 'distance': 9920, 'duration... NaN \n", - "... ... ... \n", - "2181 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", - "2182 {'duration': 2605, 'distance': 14702, 'duratio... NaN \n", - "2183 {'duration': 2322, 'distance': 12104, 'duratio... 0.0 \n", - "2184 {'duration': 2248, 'distance': 6893, 'duration... 105.0 \n", - "2185 {'duration': 2733, 'distance': 6490, 'duration... 993.0 \n", + " travel_time_second lease_left \\\n", + "0 {'duration': 1948, 'distance': 10927, 'duratio... NaN \n", + "1 None 996.0 \n", + "2 {'duration': 3333, 'distance': 10013, 'duratio... 91.0 \n", + "3 {'duration': 2322, 'distance': 13491, 'duratio... 0.0 \n", + "4 None NaN \n", + "... ... ... \n", + "34537 {'duration': 1713, 'distance': 6755, 'duration... 993.0 \n", + "34538 None NaN \n", + "34539 None NaN \n", + "34540 None NaN \n", + "34541 None 125.0 \n", "\n", - " development decision \n", - "0 False None \n", - "1 False None \n", - "2 False None \n", - "3 False None \n", - "4 False None \n", - "... ... ... \n", - "2181 True None \n", - "2182 True None \n", - "2183 False None \n", - "2184 False None \n", - "2185 False None \n", + " development decision \n", + "0 False None \n", + "1 False None \n", + "2 False None \n", + "3 False None \n", + "4 False None \n", + "... ... ... \n", + "34537 True None \n", + "34538 False None \n", + "34539 False None \n", + "34540 False None \n", + "34541 True None \n", "\n", - "[2186 rows x 11 columns]" + "[34542 rows x 11 columns]" ] }, - "execution_count": 4, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -354,17 +475,17 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "id": "d80d9911-9a6d-4608-a6da-11dc864ee32b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(2186, 11)" + "(34542, 11)" ] }, - "execution_count": 5, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -375,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "id": "7b37ad6b-9b0a-444e-b8c3-6fe4e43e42cb", "metadata": {}, "outputs": [ @@ -423,45 +544,45 @@ " \n", " \n", " 0\n", - " 102360773\n", + " 100506851\n", + " 58.4\n", + " 525000.0\n", + " 8989.726027\n", + " https://www.rightmove.co.uk/properties/100506851\n", + " 2\n", " NaN\n", - " 350000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/102360773\n", - " 1\n", - " 119.0\n", " False\n", " None\n", - " 2695\n", - " 174\n", - " {'WALK': 414, 'TRANSIT': 2280}\n", - " 1\n", - " 1682.0\n", - " 204.0\n", - " {'WALK': 608, 'TRANSIT': 804}\n", + " 1948.0\n", + " 161.0\n", + " {'WALK': 481, 'TRANSIT': 1200}\n", " 2.0\n", - " 28.0\n", + " 1948.0\n", + " 161.0\n", + " {'WALK': 481, 'TRANSIT': 1200}\n", + " 2.0\n", + " 32.0\n", " \n", " \n", " 1\n", - " 105836849\n", + " 100938761\n", " NaN\n", - " 400000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/105836849\n", - " 3\n", + " 390000.0\n", " NaN\n", + " https://www.rightmove.co.uk/properties/100938761\n", + " 1\n", + " 996.0\n", " False\n", " None\n", - " 2565\n", - " 340\n", - " {'WALK': 750, 'TRANSIT': 1800}\n", - " 1\n", - " 2565.0\n", - " 340.0\n", - " {'WALK': 750, 'TRANSIT': 1800}\n", - " 1.0\n", - " 43.0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", "\n", @@ -469,61 +590,61 @@ ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", - "0 102360773 NaN 350000.0 NaN \n", - "1 105836849 NaN 400000.0 NaN \n", + "0 100506851 58.4 525000.0 8989.726027 \n", + "1 100938761 NaN 390000.0 NaN \n", "\n", " url bedrooms lease_left \\\n", - "0 https://www.rightmove.co.uk/properties/102360773 1 119.0 \n", - "1 https://www.rightmove.co.uk/properties/105836849 3 NaN \n", + "0 https://www.rightmove.co.uk/properties/100506851 2 NaN \n", + "1 https://www.rightmove.co.uk/properties/100938761 1 996.0 \n", "\n", " development decision a_duration a_initial_walk_duration \\\n", - "0 False None 2695 174 \n", - "1 False None 2565 340 \n", + "0 False None 1948.0 161.0 \n", + "1 False None NaN NaN \n", "\n", " a_duration_per_transit a_number_of_transit_stops b_duration \\\n", - "0 {'WALK': 414, 'TRANSIT': 2280} 1 1682.0 \n", - "1 {'WALK': 750, 'TRANSIT': 1800} 1 2565.0 \n", + "0 {'WALK': 481, 'TRANSIT': 1200} 2.0 1948.0 \n", + "1 NaN NaN NaN \n", "\n", " b_initial_walk_duration b_duration_per_transit \\\n", - "0 204.0 {'WALK': 608, 'TRANSIT': 804} \n", - "1 340.0 {'WALK': 750, 'TRANSIT': 1800} \n", + "0 161.0 {'WALK': 481, 'TRANSIT': 1200} \n", + "1 NaN NaN \n", "\n", " b_number_of_transit_stops min_duration \n", - "0 2.0 28.0 \n", - "1 1.0 43.0 " + "0 2.0 32.0 \n", + "1 NaN NaN " ] }, - "execution_count": 6, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dropcolumns = ['distance_per_transit', 'duration_static', 'distance']\n", - "s1 = df['travel_time_fastest'].apply(pd.Series).drop(dropcolumns, axis=1)\n", - "s1.columns = ['a_' + c for c in s1.columns]\n", + "# dropcolumns = ['distance_per_transit', 'duration_static', 'distance']\n", + "# s1 = df['travel_time_fastest'].apply(pd.Series).drop(dropcolumns, axis=1)\n", + "# s1.columns = ['a_' + c for c in s1.columns]\n", "\n", - "s2 = df['travel_time_second'].apply(pd.Series).drop(dropcolumns, axis=1)\n", - "s2.columns = ['b_' + c for c in s2.columns]\n", + "# s2 = df['travel_time_second'].apply(pd.Series).drop(dropcolumns, axis=1)\n", + "# s2.columns = ['b_' + c for c in s2.columns]\n", "\n", - "df2 = pd.concat([df.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1, s2], axis=1)\n", - "df2.loc[:, 'min_duration'] = (df2.loc[:, ['a_duration', 'b_duration']].min(axis=1) / 60).round()\n", - "df2.head(2)" + "# df2 = pd.concat([df.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1, s2], axis=1)\n", + "# df2.loc[:, 'min_duration'] = (df2.loc[:, ['a_duration', 'b_duration']].min(axis=1) / 60).round()\n", + "# df2.head(2)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "id": "8c75aaa6-6113-482f-809b-11e405510184", "metadata": {}, "outputs": [], "source": [ - "df2.to_clipboard()" + "# df2.to_clipboard()" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "79f99692-91e8-4915-9b57-7b3a1efd7d3a", "metadata": {}, "outputs": [ @@ -566,35 +687,35 @@ " \n", " \n", " 0\n", - " 102360773\n", + " 100506851\n", + " 58.4\n", + " 525000.0\n", + " 8989.726027\n", + " https://www.rightmove.co.uk/properties/100506851\n", + " 2\n", " NaN\n", - " 350000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/102360773\n", - " 1\n", - " 119.0\n", " False\n", " None\n", - " 45\n", - " 174\n", - " {'WALK': 414, 'TRANSIT': 2280}\n", - " 1\n", + " 32.0\n", + " 161.0\n", + " {'WALK': 481, 'TRANSIT': 1200}\n", + " 2.0\n", " \n", " \n", " 1\n", - " 105836849\n", + " 100938761\n", " NaN\n", - " 400000.0\n", - " NaN\n", - " https://www.rightmove.co.uk/properties/105836849\n", - " 3\n", + " 390000.0\n", " NaN\n", + " https://www.rightmove.co.uk/properties/100938761\n", + " 1\n", + " 996.0\n", " False\n", " None\n", - " 43\n", - " 340\n", - " {'WALK': 750, 'TRANSIT': 1800}\n", - " 1\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", "\n", @@ -602,23 +723,23 @@ ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", - "0 102360773 NaN 350000.0 NaN \n", - "1 105836849 NaN 400000.0 NaN \n", + "0 100506851 58.4 525000.0 8989.726027 \n", + "1 100938761 NaN 390000.0 NaN \n", "\n", " url bedrooms lease_left \\\n", - "0 https://www.rightmove.co.uk/properties/102360773 1 119.0 \n", - "1 https://www.rightmove.co.uk/properties/105836849 3 NaN \n", + "0 https://www.rightmove.co.uk/properties/100506851 2 NaN \n", + "1 https://www.rightmove.co.uk/properties/100938761 1 996.0 \n", "\n", " development decision duration initial_walk_duration \\\n", - "0 False None 45 174 \n", - "1 False None 43 340 \n", + "0 False None 32.0 161.0 \n", + "1 False None NaN NaN \n", "\n", " duration_per_transit number_of_transit_stops \n", - "0 {'WALK': 414, 'TRANSIT': 2280} 1 \n", - "1 {'WALK': 750, 'TRANSIT': 1800} 1 " + "0 {'WALK': 481, 'TRANSIT': 1200} 2.0 \n", + "1 NaN NaN " ] }, - "execution_count": 8, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -635,1029 +756,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "abcbde40-7432-4449-957a-79ce2ca126eb", "metadata": {}, "outputs": [], "source": [ - "\n" + "df3[df3.duration < 45].to_clipboard()\n" ] }, - { - "cell_type": "code", - "execution_count": 5, - "id": "99c5b304-3d13-466b-a9f5-83a5db6311b5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sqm_ocrpriceprice_per_sqmurlbedrooms
95374142.40550000.07.418157https://www.rightmove.co.uk/properties/1455465382
24921850.10725000.033.180626https://www.rightmove.co.uk/properties/1408217362
177137.50695000.05054.545455https://www.rightmove.co.uk/properties/1393542592
35285.50475000.05555.555556https://www.rightmove.co.uk/properties/1421423482
3682.90475000.05729.794934https://www.rightmove.co.uk/properties/1289259502
700101.00650000.06435.643564https://www.rightmove.co.uk/properties/1445915721
82380.40525000.06529.850746https://www.rightmove.co.uk/properties/1450517692
21491.51650000.07103.048847https://www.rightmove.co.uk/properties/1403263072
17191.10650000.07135.016465https://www.rightmove.co.uk/properties/1392454282
598109.40795000.07266.910420https://www.rightmove.co.uk/properties/1440346552
14693.78700000.07464.278098https://www.rightmove.co.uk/properties/1385103952
711100.31750000.07476.821852https://www.rightmove.co.uk/properties/1446203032
59286.49650000.07515.319690https://www.rightmove.co.uk/properties/1439876692
624101.35775000.07646.768624https://www.rightmove.co.uk/properties/1442179221
5597.60750000.07684.426230https://www.rightmove.co.uk/properties/1325647372
85181.01625000.07715.096902https://www.rightmove.co.uk/properties/1451725042
35689.00695000.07808.988764https://www.rightmove.co.uk/properties/1421856232
74885.90675000.07857.974389https://www.rightmove.co.uk/properties/1447712812
20482.40650000.07888.349515https://www.rightmove.co.uk/properties/1401733192
65483.33665000.07980.319213https://www.rightmove.co.uk/properties/1443611002
51487.50700000.08000.000000https://www.rightmove.co.uk/properties/1434603651
76292.90750000.08073.196986https://www.rightmove.co.uk/properties/1448467251
96392.90750000.08073.196986https://www.rightmove.co.uk/properties/1455652522
93886.10699000.08118.466899https://www.rightmove.co.uk/properties/1454911372
40390.60750000.08278.145695https://www.rightmove.co.uk/properties/1427044162
30293.30795000.08520.900322https://www.rightmove.co.uk/properties/1415954332
67193.27800000.08577.248847https://www.rightmove.co.uk/properties/1444291402
34980.57695000.08626.039469https://www.rightmove.co.uk/properties/1421159182
22490.10800000.08879.023307https://www.rightmove.co.uk/properties/1404644812
74081.70735000.08996.328029https://www.rightmove.co.uk/properties/1447224142
32687.30800000.09163.802978https://www.rightmove.co.uk/properties/1418460232
31981.57750000.09194.556822https://www.rightmove.co.uk/properties/1417973572
55886.77800000.09219.776420https://www.rightmove.co.uk/properties/1437587632
71286.30800000.09269.988413https://www.rightmove.co.uk/properties/1446221572
47386.00800000.09302.325581https://www.rightmove.co.uk/properties/1432101022
10580.40750000.09328.358209https://www.rightmove.co.uk/properties/1369887262
100380.27750000.09343.465803https://www.rightmove.co.uk/properties/867752912
23582.80775000.09359.903382https://www.rightmove.co.uk/properties/1406110552
6585.10800000.09400.705053https://www.rightmove.co.uk/properties/1341162322
3083.70795000.09498.207885https://www.rightmove.co.uk/properties/1277879601
102581.60790000.09681.372549https://www.rightmove.co.uk/properties/869727262
8881.75800000.09785.932722https://www.rightmove.co.uk/properties/1360121931
45480.80800000.09900.990099https://www.rightmove.co.uk/properties/1431388672
34380.64800000.09920.634921https://www.rightmove.co.uk/properties/1420329352
\n", - "
" - ], - "text/plain": [ - " sqm_ocr price price_per_sqm \\\n", - "953 74142.40 550000.0 7.418157 \n", - "249 21850.10 725000.0 33.180626 \n", - "177 137.50 695000.0 5054.545455 \n", - "352 85.50 475000.0 5555.555556 \n", - "36 82.90 475000.0 5729.794934 \n", - "700 101.00 650000.0 6435.643564 \n", - "823 80.40 525000.0 6529.850746 \n", - "214 91.51 650000.0 7103.048847 \n", - "171 91.10 650000.0 7135.016465 \n", - "598 109.40 795000.0 7266.910420 \n", - "146 93.78 700000.0 7464.278098 \n", - "711 100.31 750000.0 7476.821852 \n", - "592 86.49 650000.0 7515.319690 \n", - "624 101.35 775000.0 7646.768624 \n", - "55 97.60 750000.0 7684.426230 \n", - "851 81.01 625000.0 7715.096902 \n", - "356 89.00 695000.0 7808.988764 \n", - "748 85.90 675000.0 7857.974389 \n", - "204 82.40 650000.0 7888.349515 \n", - "654 83.33 665000.0 7980.319213 \n", - "514 87.50 700000.0 8000.000000 \n", - "762 92.90 750000.0 8073.196986 \n", - "963 92.90 750000.0 8073.196986 \n", - "938 86.10 699000.0 8118.466899 \n", - "403 90.60 750000.0 8278.145695 \n", - "302 93.30 795000.0 8520.900322 \n", - "671 93.27 800000.0 8577.248847 \n", - "349 80.57 695000.0 8626.039469 \n", - "224 90.10 800000.0 8879.023307 \n", - "740 81.70 735000.0 8996.328029 \n", - "326 87.30 800000.0 9163.802978 \n", - "319 81.57 750000.0 9194.556822 \n", - "558 86.77 800000.0 9219.776420 \n", - "712 86.30 800000.0 9269.988413 \n", - "473 86.00 800000.0 9302.325581 \n", - "105 80.40 750000.0 9328.358209 \n", - "1003 80.27 750000.0 9343.465803 \n", - "235 82.80 775000.0 9359.903382 \n", - "65 85.10 800000.0 9400.705053 \n", - "30 83.70 795000.0 9498.207885 \n", - "1025 81.60 790000.0 9681.372549 \n", - "88 81.75 800000.0 9785.932722 \n", - "454 80.80 800000.0 9900.990099 \n", - "343 80.64 800000.0 9920.634921 \n", - "\n", - " url bedrooms \n", - "953 https://www.rightmove.co.uk/properties/145546538 2 \n", - "249 https://www.rightmove.co.uk/properties/140821736 2 \n", - "177 https://www.rightmove.co.uk/properties/139354259 2 \n", - "352 https://www.rightmove.co.uk/properties/142142348 2 \n", - "36 https://www.rightmove.co.uk/properties/128925950 2 \n", - "700 https://www.rightmove.co.uk/properties/144591572 1 \n", - "823 https://www.rightmove.co.uk/properties/145051769 2 \n", - "214 https://www.rightmove.co.uk/properties/140326307 2 \n", - "171 https://www.rightmove.co.uk/properties/139245428 2 \n", - "598 https://www.rightmove.co.uk/properties/144034655 2 \n", - "146 https://www.rightmove.co.uk/properties/138510395 2 \n", - "711 https://www.rightmove.co.uk/properties/144620303 2 \n", - "592 https://www.rightmove.co.uk/properties/143987669 2 \n", - "624 https://www.rightmove.co.uk/properties/144217922 1 \n", - "55 https://www.rightmove.co.uk/properties/132564737 2 \n", - "851 https://www.rightmove.co.uk/properties/145172504 2 \n", - "356 https://www.rightmove.co.uk/properties/142185623 2 \n", - "748 https://www.rightmove.co.uk/properties/144771281 2 \n", - "204 https://www.rightmove.co.uk/properties/140173319 2 \n", - "654 https://www.rightmove.co.uk/properties/144361100 2 \n", - "514 https://www.rightmove.co.uk/properties/143460365 1 \n", - "762 https://www.rightmove.co.uk/properties/144846725 1 \n", - "963 https://www.rightmove.co.uk/properties/145565252 2 \n", - "938 https://www.rightmove.co.uk/properties/145491137 2 \n", - "403 https://www.rightmove.co.uk/properties/142704416 2 \n", - "302 https://www.rightmove.co.uk/properties/141595433 2 \n", - "671 https://www.rightmove.co.uk/properties/144429140 2 \n", - "349 https://www.rightmove.co.uk/properties/142115918 2 \n", - "224 https://www.rightmove.co.uk/properties/140464481 2 \n", - "740 https://www.rightmove.co.uk/properties/144722414 2 \n", - "326 https://www.rightmove.co.uk/properties/141846023 2 \n", - "319 https://www.rightmove.co.uk/properties/141797357 2 \n", - "558 https://www.rightmove.co.uk/properties/143758763 2 \n", - "712 https://www.rightmove.co.uk/properties/144622157 2 \n", - "473 https://www.rightmove.co.uk/properties/143210102 2 \n", - "105 https://www.rightmove.co.uk/properties/136988726 2 \n", - "1003 https://www.rightmove.co.uk/properties/86775291 2 \n", - "235 https://www.rightmove.co.uk/properties/140611055 2 \n", - "65 https://www.rightmove.co.uk/properties/134116232 2 \n", - "30 https://www.rightmove.co.uk/properties/127787960 1 \n", - "1025 https://www.rightmove.co.uk/properties/86972726 2 \n", - "88 https://www.rightmove.co.uk/properties/136012193 1 \n", - "454 https://www.rightmove.co.uk/properties/143138867 2 \n", - "343 https://www.rightmove.co.uk/properties/142032935 2 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = df[df.sqm_ocr > 80]\n", - "df2.sort_values('price_per_sqm')" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "52545cfa-0932-46fe-ba7e-961fd43f2786", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sqm_ocrpriceprice_per_sqmurlbedrooms
35973.40400000.05449.591281https://www.rightmove.co.uk/properties/1421869912
29377.00425000.05519.480519https://www.rightmove.co.uk/properties/1414377832
35285.50475000.05555.555556https://www.rightmove.co.uk/properties/1421423482
68576.03425000.05589.898724https://www.rightmove.co.uk/properties/1444940122
3682.90475000.05729.794934https://www.rightmove.co.uk/properties/1289259502
49171.68450000.06277.901786https://www.rightmove.co.uk/properties/1433158402
102073.67495000.06719.152980https://www.rightmove.co.uk/properties/868079162
\n", - "
" - ], - "text/plain": [ - " sqm_ocr price price_per_sqm \\\n", - "359 73.40 400000.0 5449.591281 \n", - "293 77.00 425000.0 5519.480519 \n", - "352 85.50 475000.0 5555.555556 \n", - "685 76.03 425000.0 5589.898724 \n", - "36 82.90 475000.0 5729.794934 \n", - "491 71.68 450000.0 6277.901786 \n", - "1020 73.67 495000.0 6719.152980 \n", - "\n", - " url bedrooms \n", - "359 https://www.rightmove.co.uk/properties/142186991 2 \n", - "293 https://www.rightmove.co.uk/properties/141437783 2 \n", - "352 https://www.rightmove.co.uk/properties/142142348 2 \n", - "685 https://www.rightmove.co.uk/properties/144494012 2 \n", - "36 https://www.rightmove.co.uk/properties/128925950 2 \n", - "491 https://www.rightmove.co.uk/properties/143315840 2 \n", - "1020 https://www.rightmove.co.uk/properties/86807916 2 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = df[(df.sqm_ocr > 70) & (df.price <500000)]\n", - "df2.sort_values('price_per_sqm')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "d0246926-13ef-4110-8e3a-fb676a55c2a6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sqm_ocrpriceprice_per_sqmurlbedrooms
95374142.40550000.07.418157https://www.rightmove.co.uk/properties/1455465382
82380.40525000.06529.850746https://www.rightmove.co.uk/properties/1450517692
49273.90525000.07104.194858https://www.rightmove.co.uk/properties/1433173612
56176.70550000.07170.795306https://www.rightmove.co.uk/properties/1437807892
6973.10525000.07181.942544https://www.rightmove.co.uk/properties/1345745632
98872.40525000.07251.381215https://www.rightmove.co.uk/properties/866489252
27277.89575000.07382.205675https://www.rightmove.co.uk/properties/1411312972
52170.88525000.07406.884876https://www.rightmove.co.uk/properties/1435141492
32476.10575000.07555.847569https://www.rightmove.co.uk/properties/1418313532
101871.80550000.07660.167131https://www.rightmove.co.uk/properties/868048322
52678.00600000.07692.307692https://www.rightmove.co.uk/properties/1435521561
81771.17550000.07727.975270https://www.rightmove.co.uk/properties/1450359292
84377.50600000.07741.935484https://www.rightmove.co.uk/properties/1451449882
33770.60550000.07790.368272https://www.rightmove.co.uk/properties/1419042862
23375.70600000.07926.023778https://www.rightmove.co.uk/properties/1405822132
76375.00600000.08000.000000https://www.rightmove.co.uk/properties/1448620702
31571.30590000.08274.894811https://www.rightmove.co.uk/properties/1416696862
89971.47595000.08325.171401https://www.rightmove.co.uk/properties/1453629112
14771.50600000.08391.608392https://www.rightmove.co.uk/properties/1385375271
97370.89595000.08393.285372https://www.rightmove.co.uk/properties/862964912
\n", - "
" - ], - "text/plain": [ - " sqm_ocr price price_per_sqm \\\n", - "953 74142.40 550000.0 7.418157 \n", - "823 80.40 525000.0 6529.850746 \n", - "492 73.90 525000.0 7104.194858 \n", - "561 76.70 550000.0 7170.795306 \n", - "69 73.10 525000.0 7181.942544 \n", - "988 72.40 525000.0 7251.381215 \n", - "272 77.89 575000.0 7382.205675 \n", - "521 70.88 525000.0 7406.884876 \n", - "324 76.10 575000.0 7555.847569 \n", - "1018 71.80 550000.0 7660.167131 \n", - "526 78.00 600000.0 7692.307692 \n", - "817 71.17 550000.0 7727.975270 \n", - "843 77.50 600000.0 7741.935484 \n", - "337 70.60 550000.0 7790.368272 \n", - "233 75.70 600000.0 7926.023778 \n", - "763 75.00 600000.0 8000.000000 \n", - "315 71.30 590000.0 8274.894811 \n", - "899 71.47 595000.0 8325.171401 \n", - "147 71.50 600000.0 8391.608392 \n", - "973 70.89 595000.0 8393.285372 \n", - "\n", - " url bedrooms \n", - "953 https://www.rightmove.co.uk/properties/145546538 2 \n", - "823 https://www.rightmove.co.uk/properties/145051769 2 \n", - "492 https://www.rightmove.co.uk/properties/143317361 2 \n", - "561 https://www.rightmove.co.uk/properties/143780789 2 \n", - "69 https://www.rightmove.co.uk/properties/134574563 2 \n", - "988 https://www.rightmove.co.uk/properties/86648925 2 \n", - "272 https://www.rightmove.co.uk/properties/141131297 2 \n", - "521 https://www.rightmove.co.uk/properties/143514149 2 \n", - "324 https://www.rightmove.co.uk/properties/141831353 2 \n", - "1018 https://www.rightmove.co.uk/properties/86804832 2 \n", - "526 https://www.rightmove.co.uk/properties/143552156 1 \n", - "817 https://www.rightmove.co.uk/properties/145035929 2 \n", - "843 https://www.rightmove.co.uk/properties/145144988 2 \n", - "337 https://www.rightmove.co.uk/properties/141904286 2 \n", - "233 https://www.rightmove.co.uk/properties/140582213 2 \n", - "763 https://www.rightmove.co.uk/properties/144862070 2 \n", - "315 https://www.rightmove.co.uk/properties/141669686 2 \n", - "899 https://www.rightmove.co.uk/properties/145362911 2 \n", - "147 https://www.rightmove.co.uk/properties/138537527 1 \n", - "973 https://www.rightmove.co.uk/properties/86296491 2 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = df[(df.sqm_ocr > 70) & (df.price <=600000)& (df.price >500000)]\n", - "df2.sort_values('price_per_sqm')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9f2ca7b-8476-47f1-a34f-7acdfbe84bd1", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "edd9fa24-cad2-4448-9b17-c6d514564f41", - "metadata": {}, - "outputs": [], - "source": [ - "from data_access import Listing\n", - "import pytesseract\n", - "from PIL import Image\n", - "import re" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "002b2a3a-3ecc-45c1-8c2f-c143380ee0d5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[('3.14', 'm'), ('3.43', 'm'), ('3.89', 'm'), ('3.62', 'm'), ('3.89', 'm'), ('2.88', 'm'), ('75.3', 'sq. m'), ('4.07', 'm'), ('6.18', 'm'), ('2.79', 'm'), ('3.34', 'm'), ('2.79', 'm'), ('4.34', 'm'), ('1.76', 'm'), ('2.29', 'm'), ('1.76', 'm'), ('1.92', 'm'), ('75.3', 'sq. m')]\n", - "-----\n", - "Bedroom 2\n", - "Bedroom 1 3.14m x 3.43m\n", - "3.89m x 3.62m (10'4\" x 11'3\")\n", - "(12'9\" x 11'10\")\n", - "\n", - "Bedroom 3\n", - "3.89m x 2.88m\n", - "(12'9\" x 9'5\")\n", - "\n", - "xt\n", - "\n", - "v\n", - "\n", - "Levita House NW1\n", - "\n", - "Approx. 75.3 sq. metres (810.2 sq. feet)\n", - "\n", - "Storage\n", - "\n", - ". Hall\n", - "Kitchen 4.07m x 6.18m\n", - "2.79m x 3.34m (13'4\" x 20'3\") Bedroom 4\n", - "\n", - "(9'2\" x 10'11\") 2.79m x 4.34m\n", - "\n", - "(9'2\" x 14'3\")\n", - "\n", - "Bathroom 1\n", - "1.76m x 2.29m\n", - "(5'9\" x 76\")\n", - "\n", - "Bathroom 2\n", - "1.76m x 1.92m\n", - "(5'9\" x 64\")\n", - "\n", - "Total area: approx. 75.3 sq. metres (810.2 sq. feet)\n", - "\n" - ] - } - ], - "source": [ - "l = Listing(144497822)\n", - "for path in l.path_floorplan_folder().iterdir():\n", - " img = Image.open(path)\n", - " text = pytesseract.image_to_string(img)\n", - " sqmregex = r'(\\d+\\.\\d*) ?(sq ?m|sq. ?m|m)'\n", - " matches = re.findall(sqmregex, text.lower())\n", - " print(matches)\n", - " print(\"-----\")\n", - " print(text)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "015e870d-0cf0-4d07-a9ae-4e80d128b26c", - "metadata": {}, - "outputs": [], - "source": [ - "import enum" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "577dcd5f-07df-4d61-b837-a90db59e3ed5", - "metadata": {}, - "outputs": [], - "source": [ - "class PropertyType(enum.StrEnum): \n", - " BUNGALOW= \"bungalow\"\n", - " DETACHED= \"detached\"\n", - " FLAT= \"flat\"\n", - " LAND= \"land\"\n", - " PARK_HOME= \"park-home\"\n", - " SEMI_DETACHED= \"semi-detached\"\n", - " TERRACED= \"terraced\"" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "50cc2eb3-1c3b-49b8-86a3-73dd2d151a61", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'bungalow,park-home'" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x = [PropertyType.BUNGALOW,PropertyType.PARK_HOME]\n", - "','.join(x)" - ] - }, - { - "cell_type": "markdown", - "id": "87ead853-8a71-4de9-98d1-f4f2673a5592", - "metadata": {}, - "source": [] - }, { "cell_type": "markdown", "id": "98f8e950-2a3b-4856-aa62-3bc758e2fd42", @@ -1912,6 +1018,136 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "386578dc-1ad5-4b8a-8905-29b0c47a6174", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10c17fdf-f424-40cb-9d8c-9218f8d4ab53", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88d99eb7-8c92-4817-86ce-ba0738331dba", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c8b4488-ae2b-41ab-9c95-e3c85f9fb77e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3072907-7ad8-4618-92ab-818e392218d9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b20f6f16-3236-4772-b1a3-2d4a3b1925a6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a7230dc-1a0f-43e2-bd15-0c85ea445733", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aeaf84bf-8514-48c6-88ce-2c6828bdcdf2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c888d4e6-d192-45df-b9b6-5e2d39bca344", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cd71db7f-ba11-4d5d-a183-768ed4db23ba", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/kadir/code/realestate/crawler/venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from data_access import Listing" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d0ced84b-ee91-4642-b2ff-dd32d9f1e437", + "metadata": {}, + "outputs": [], + "source": [ + "l = Listing(133604363)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7157f5f7-65b3-4232-bcae-26b93e5d93e6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6395.76" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l.serviceCharge" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f453f9c-bdaa-4713-8220-c504f1a436ae", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/crawler/rec/districts.py b/crawler/rec/districts.py index 81c9f4b..48e58de 100644 --- a/crawler/rec/districts.py +++ b/crawler/rec/districts.py @@ -33,4 +33,4 @@ def get_districts(): "Waltham Forest": "REGION^61232", "Wandsworth": "REGION^93977", "Westminster": "REGION^93980", -} + }