{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "38e8690a-f6f7-4e14-a657-f20605477afd", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/kadir/code/realestate/crawler/venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from data_access import Listing\n", "import pandas as pd" ] }, { "cell_type": "markdown", "id": "cfe2ab03-3204-4fd8-b76a-a734f6b87d75", "metadata": {}, "source": [ "### Fetch previous decisions" ] }, { "cell_type": "code", "execution_count": 2, "id": "db55b615-698c-4f5d-881a-ea1d3b6d6205", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(93, 2)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifierdecision
2047145699277n
1442144642851n
1839145394765n
1853145418669removed
930143205230n
\n", "
" ], "text/plain": [ " identifier decision\n", "2047 145699277 n\n", "1442 144642851 n\n", "1839 145394765 n\n", "1853 145418669 removed\n", "930 143205230 n" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "decisions = pd.read_clipboard()\n", "decisions = decisions.loc[decisions.decision.notna(), ['identifier', 'decision']]\n", "print(decisions.shape)\n", "decisions.head()\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "86224a20-53e1-403c-8d9f-71b9a9df750c", "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "output:\n", "{145699277: 'n',\n", " 144642851: 'n',\n", " 145394765: 'n',\n", " 145418669: 'removed',\n", " 143205230: 'n',\n", " 140628560: 'eigentlich geil',\n", " ...\n", "}\n", "\"\"\"\n", "decisions = decisions.set_index('identifier').decision.to_dict()" ] }, { "cell_type": "code", "execution_count": null, "id": "ec257220-f170-41b8-9f9d-b8ef61512acf", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 4, "id": "6dbd25bd-802d-4953-83c3-f01640174353", "metadata": {}, "outputs": [], "source": [ "# Use if we want to skip at the bottom\n", "# decisions = {}" ] }, { "cell_type": "markdown", "id": "7c1ee5eb-1000-4ced-983c-df47fb6ceae8", "metadata": {}, "source": [ "### Get all data prepped for sheets" ] }, { "cell_type": "code", "execution_count": null, "id": "f20bddee-1e7c-4c46-a17a-c7bb6c13f30c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "id": "b1101088-9613-465f-81fd-79801e0202b8", "metadata": {}, "outputs": [], "source": [ "ls = Listing.get_all_listings()" ] }, { "cell_type": "code", "execution_count": 6, "id": "63e61601-7e3f-4d58-89f6-1794e4868cc3", "metadata": {}, "outputs": [], "source": [ "ds = [l.dict_nicely() for l in ls]" ] }, { "cell_type": "code", "execution_count": 7, "id": "1c222721-f426-42c0-9ac5-badc1f7a2034", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifiersqm_ocrpriceprice_per_sqmurlbedroomstravel_time_fastesttravel_time_secondlease_leftdevelopmentdecision
010050685158.4525000.08989.726027https://www.rightmove.co.uk/properties/1005068512{'duration': 1948, 'distance': 10927, 'duratio...{'duration': 1948, 'distance': 10927, 'duratio...NaNFalseNone
1100938761NaN390000.0NaNhttps://www.rightmove.co.uk/properties/1009387611NoneNone996.0FalseNone
210181717953.2495000.09304.511278https://www.rightmove.co.uk/properties/1018171791{'duration': 2702, 'distance': 8637, 'duration...{'duration': 3333, 'distance': 10013, 'duratio...91.0FalseNone
310193966056.5238000.04212.389381https://www.rightmove.co.uk/properties/1019396602{'duration': 2262, 'distance': 13512, 'duratio...{'duration': 2322, 'distance': 13491, 'duratio...0.0FalseNone
4102103157NaN425000.0NaNhttps://www.rightmove.co.uk/properties/1021031571NoneNoneNaNFalseNone
....................................
34537970234438.3699999.084337.228916https://www.rightmove.co.uk/properties/970234431{'duration': 1704, 'distance': 8729, 'duration...{'duration': 1713, 'distance': 6755, 'duration...993.0TrueNone
345389712423753.4300000.05617.977528https://www.rightmove.co.uk/properties/971242371NoneNoneNaNFalseNone
345399733568048.0315000.06562.500000https://www.rightmove.co.uk/properties/973356802NoneNoneNaNFalseNone
3454097522346NaN400000.0NaNhttps://www.rightmove.co.uk/properties/975223462NoneNoneNaNFalseNone
3454198352914NaN399950.0NaNhttps://www.rightmove.co.uk/properties/983529142NoneNone125.0TrueNone
\n", "

34542 rows × 11 columns

\n", "
" ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", "0 100506851 58.4 525000.0 8989.726027 \n", "1 100938761 NaN 390000.0 NaN \n", "2 101817179 53.2 495000.0 9304.511278 \n", "3 101939660 56.5 238000.0 4212.389381 \n", "4 102103157 NaN 425000.0 NaN \n", "... ... ... ... ... \n", "34537 97023443 8.3 699999.0 84337.228916 \n", "34538 97124237 53.4 300000.0 5617.977528 \n", "34539 97335680 48.0 315000.0 6562.500000 \n", "34540 97522346 NaN 400000.0 NaN \n", "34541 98352914 NaN 399950.0 NaN \n", "\n", " url bedrooms \\\n", "0 https://www.rightmove.co.uk/properties/100506851 2 \n", "1 https://www.rightmove.co.uk/properties/100938761 1 \n", "2 https://www.rightmove.co.uk/properties/101817179 1 \n", "3 https://www.rightmove.co.uk/properties/101939660 2 \n", "4 https://www.rightmove.co.uk/properties/102103157 1 \n", "... ... ... \n", "34537 https://www.rightmove.co.uk/properties/97023443 1 \n", "34538 https://www.rightmove.co.uk/properties/97124237 1 \n", "34539 https://www.rightmove.co.uk/properties/97335680 2 \n", "34540 https://www.rightmove.co.uk/properties/97522346 2 \n", "34541 https://www.rightmove.co.uk/properties/98352914 2 \n", "\n", " travel_time_fastest \\\n", "0 {'duration': 1948, 'distance': 10927, 'duratio... \n", "1 None \n", "2 {'duration': 2702, 'distance': 8637, 'duration... \n", "3 {'duration': 2262, 'distance': 13512, 'duratio... \n", "4 None \n", "... ... \n", "34537 {'duration': 1704, 'distance': 8729, 'duration... \n", "34538 None \n", "34539 None \n", "34540 None \n", "34541 None \n", "\n", " travel_time_second lease_left \\\n", "0 {'duration': 1948, 'distance': 10927, 'duratio... NaN \n", "1 None 996.0 \n", "2 {'duration': 3333, 'distance': 10013, 'duratio... 91.0 \n", "3 {'duration': 2322, 'distance': 13491, 'duratio... 0.0 \n", "4 None NaN \n", "... ... ... \n", "34537 {'duration': 1713, 'distance': 6755, 'duration... 993.0 \n", "34538 None NaN \n", "34539 None NaN \n", "34540 None NaN \n", "34541 None 125.0 \n", "\n", " development decision \n", "0 False None \n", "1 False None \n", "2 False None \n", "3 False None \n", "4 False None \n", "... ... ... \n", "34537 True None \n", "34538 False None \n", "34539 False None \n", "34540 False None \n", "34541 True None \n", "\n", "[34542 rows x 11 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(ds)\n", "df.loc[:, 'decision'] = df.identifier.apply(lambda x: decisions.get(x))\n", "df" ] }, { "cell_type": "code", "execution_count": 8, "id": "d80d9911-9a6d-4608-a6da-11dc864ee32b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(34542, 11)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 9, "id": "7b37ad6b-9b0a-444e-b8c3-6fe4e43e42cb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifiersqm_ocrpriceprice_per_sqmurlbedroomslease_leftdevelopmentdecisiona_durationa_initial_walk_durationa_duration_per_transita_number_of_transit_stopsb_durationb_initial_walk_durationb_duration_per_transitb_number_of_transit_stopsmin_duration
010050685158.4525000.08989.726027https://www.rightmove.co.uk/properties/1005068512NaNFalseNone1948.0161.0{'WALK': 481, 'TRANSIT': 1200}2.01948.0161.0{'WALK': 481, 'TRANSIT': 1200}2.032.0
1100938761NaN390000.0NaNhttps://www.rightmove.co.uk/properties/1009387611996.0FalseNoneNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", "0 100506851 58.4 525000.0 8989.726027 \n", "1 100938761 NaN 390000.0 NaN \n", "\n", " url bedrooms lease_left \\\n", "0 https://www.rightmove.co.uk/properties/100506851 2 NaN \n", "1 https://www.rightmove.co.uk/properties/100938761 1 996.0 \n", "\n", " development decision a_duration a_initial_walk_duration \\\n", "0 False None 1948.0 161.0 \n", "1 False None NaN NaN \n", "\n", " a_duration_per_transit a_number_of_transit_stops b_duration \\\n", "0 {'WALK': 481, 'TRANSIT': 1200} 2.0 1948.0 \n", "1 NaN NaN NaN \n", "\n", " b_initial_walk_duration b_duration_per_transit \\\n", "0 161.0 {'WALK': 481, 'TRANSIT': 1200} \n", "1 NaN NaN \n", "\n", " b_number_of_transit_stops min_duration \n", "0 2.0 32.0 \n", "1 NaN NaN " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# dropcolumns = ['distance_per_transit', 'duration_static', 'distance']\n", "# s1 = df['travel_time_fastest'].apply(pd.Series).drop(dropcolumns, axis=1)\n", "# s1.columns = ['a_' + c for c in s1.columns]\n", "\n", "# s2 = df['travel_time_second'].apply(pd.Series).drop(dropcolumns, axis=1)\n", "# s2.columns = ['b_' + c for c in s2.columns]\n", "\n", "# df2 = pd.concat([df.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1, s2], axis=1)\n", "# df2.loc[:, 'min_duration'] = (df2.loc[:, ['a_duration', 'b_duration']].min(axis=1) / 60).round()\n", "# df2.head(2)" ] }, { "cell_type": "code", "execution_count": 10, "id": "8c75aaa6-6113-482f-809b-11e405510184", "metadata": {}, "outputs": [], "source": [ "# df2.to_clipboard()" ] }, { "cell_type": "code", "execution_count": 13, "id": "79f99692-91e8-4915-9b57-7b3a1efd7d3a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifiersqm_ocrpriceprice_per_sqmurlbedroomslease_leftdevelopmentdecisiondurationinitial_walk_durationduration_per_transitnumber_of_transit_stops
010050685158.4525000.08989.726027https://www.rightmove.co.uk/properties/1005068512NaNFalseNone32.0161.0{'WALK': 481, 'TRANSIT': 1200}2.0
1100938761NaN390000.0NaNhttps://www.rightmove.co.uk/properties/1009387611996.0FalseNoneNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " identifier sqm_ocr price price_per_sqm \\\n", "0 100506851 58.4 525000.0 8989.726027 \n", "1 100938761 NaN 390000.0 NaN \n", "\n", " url bedrooms lease_left \\\n", "0 https://www.rightmove.co.uk/properties/100506851 2 NaN \n", "1 https://www.rightmove.co.uk/properties/100938761 1 996.0 \n", "\n", " development decision duration initial_walk_duration \\\n", "0 False None 32.0 161.0 \n", "1 False None NaN NaN \n", "\n", " duration_per_transit number_of_transit_stops \n", "0 {'WALK': 481, 'TRANSIT': 1200} 2.0 \n", "1 NaN NaN " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dropcolumns = ['distance_per_transit', 'duration_static', 'distance']\n", "s1 = df['travel_time_fastest'].apply(pd.Series).drop(dropcolumns, axis=1)\n", "\n", "df3 = pd.concat([df.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1], axis=1)\n", "df3.loc[:, 'duration'] = (df3.loc[:, ['duration']].min(axis=1) / 60).round()\n", "df3.to_clipboard()\n", "df3.head(2)" ] }, { "cell_type": "code", "execution_count": 20, "id": "abcbde40-7432-4449-957a-79ce2ca126eb", "metadata": {}, "outputs": [], "source": [ "df3[df3.duration < 45].to_clipboard()\n" ] }, { "cell_type": "markdown", "id": "98f8e950-2a3b-4856-aa62-3bc758e2fd42", "metadata": {}, "source": [ "# Find out the proper radius we want to use" ] }, { "cell_type": "code", "execution_count": 50, "id": "04bb61d5-cba7-4739-9568-b00342c1b636", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Filtered listings from 32271 to 15804\n" ] } ], "source": [ "from data_access import Listing\n", "from geopy.distance import geodesic\n", "\n", "listings = Listing.get_all_listings()\n", "BROCK_STREET_LAT_LONG = 51.52570434674584, -0.13956495005056113\n", "\n", "# reduce listings to everything within 7 miles\n", "filtered_listings = []\n", "for listing in listings:\n", " miles = geodesic(BROCK_STREET_LAT_LONG, (listing.latitude, listing.longitude)).miles\n", " if miles <= 7:\n", " filtered_listings.append(listing)\n", "\n", "print(f\"Filtered listings from {len(listings)} to {len(filtered_listings)}\")" ] }, { "cell_type": "markdown", "id": "a73fba2d-afeb-4194-8421-eff8e84a14e9", "metadata": {}, "source": [ "# Typeahead / fetch all boroughs" ] }, { "cell_type": "code", "execution_count": 20, "id": "61844fe2-408d-4b89-995f-c31110a850f6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'key': 'CAMD',\n", " 'term': 'CAMD',\n", " 'typeAheadLocations': [{'displayName': 'Camden, North West London',\n", " 'locationIdentifier': 'REGION^85261',\n", " 'normalisedSearchTerm': 'CAMDEN NORTH WEST LONDON'},\n", " {'displayName': 'Camden (London Borough)',\n", " 'locationIdentifier': 'REGION^93941',\n", " 'normalisedSearchTerm': 'CAMDEN LONDON BOROUGH'},\n", " {'displayName': 'Camden Town, North West London',\n", " 'locationIdentifier': 'REGION^85262',\n", " 'normalisedSearchTerm': 'CAMDEN TOWN NORTH WEST LONDON'},\n", " {'displayName': 'Camden Town Station',\n", " 'locationIdentifier': 'STATION^1712',\n", " 'normalisedSearchTerm': 'CAMDEN TOWN STATION'},\n", " {'displayName': 'Camden Road Station',\n", " 'locationIdentifier': 'STATION^1709',\n", " 'normalisedSearchTerm': 'CAMDEN ROAD STATION'},\n", " {'displayName': 'Camden Town, Gosport, Hampshire',\n", " 'locationIdentifier': 'REGION^76577',\n", " 'normalisedSearchTerm': 'CAMDEN TOWN GOSPORT HAMPSHIRE'},\n", " {'displayName': 'Camderry, Omagh, County Tyrone, Northern Ireland',\n", " 'locationIdentifier': 'REGION^73327',\n", " 'normalisedSearchTerm': 'CAMDERRY OMAGH COUNTY TYRONE NORTHERN IRELAND'}],\n", " 'isComplete': True}" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import requests\n", "\n", "cookies = {\n", " 'permuserid': '240330LSEXEOANG04Q2VA3OZCIQ8TTSQ',\n", " 'TS019c0ed0': '012f990cd3494097746bc0b10b8d61bc6237319024e80701e0e8d735bd7d62a792529823c3164c771f223b0cade5ec9ae4b7fc001c',\n", " 'beta_optin': 'N:36:-1',\n", " 'RM_Register': 'C',\n", " 'JSESSIONID': '0BE8E261D81387C9BC530DB1A5F28955',\n", " 'svr': '3111',\n", " 'permuserid': '240330LSEXEOANG04Q2VA3OZCIQ8TTSQ',\n", " 'TS01ec61d1': '012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1',\n", " 'rmsessionid': '7bc54ce6-da97-42cf-8719-4e3e9c53e276',\n", " 'TS01821201': '012f990cd35255a563a541cfe06e4a774c129628165af71838ccdb7a17919672962514804459a0d9d9a90fe7b8feeec66145e30b98',\n", " 'TS01826437': '012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1',\n", " 'TPCmaxPrice': '800000',\n", " 'TS01a07bd2': '012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1',\n", " 'TPCminPrice': '700000',\n", "}\n", "\n", "headers = {\n", " 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0',\n", " 'Accept': 'application/json, text/javascript',\n", " 'Accept-Language': 'en-GB,en;q=0.5',\n", " # 'Accept-Encoding': 'gzip, deflate, br',\n", " 'Referer': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87515&maxBedrooms=3&minBedrooms=1&maxPrice=800000&minPrice=750001&propertyTypes=&includeSSTC=false&mustHave=&dontShow=retirement%2CsharedOwnership&furnishTypes=&keywords=',\n", " 'X-Correlation-Text-Val': 'source=search',\n", " 'Content-Type': 'application/x-www-form-urlencoded',\n", " 'traceparent': '00-d3020142d839bf1ef2b172fa596acea8-605693e1c4c5cbb5-00',\n", " 'DNT': '1',\n", " 'Sec-GPC': '1',\n", " 'Connection': 'keep-alive',\n", " # 'Cookie': 'permuserid=240330LSEXEOANG04Q2VA3OZCIQ8TTSQ; TS019c0ed0=012f990cd3494097746bc0b10b8d61bc6237319024e80701e0e8d735bd7d62a792529823c3164c771f223b0cade5ec9ae4b7fc001c; beta_optin=N:36:-1; RM_Register=C; JSESSIONID=0BE8E261D81387C9BC530DB1A5F28955; svr=3111; permuserid=240330LSEXEOANG04Q2VA3OZCIQ8TTSQ; TS01ec61d1=012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1; rmsessionid=7bc54ce6-da97-42cf-8719-4e3e9c53e276; TS01821201=012f990cd35255a563a541cfe06e4a774c129628165af71838ccdb7a17919672962514804459a0d9d9a90fe7b8feeec66145e30b98; TS01826437=012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1; TPCmaxPrice=800000; TS01a07bd2=012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1; TPCminPrice=700000',\n", " 'Sec-Fetch-Dest': 'empty',\n", " 'Sec-Fetch-Mode': 'cors',\n", " 'Sec-Fetch-Site': 'same-origin',\n", "}\n", "\n", "response = requests.get('https://www.rightmove.co.uk/typeAhead/uknostreet/CA/MD/EN/', cookies=cookies, headers=headers)\n", "response.json()" ] }, { "cell_type": "code", "execution_count": 39, "id": "eba907d0-680d-4374-a56e-95bd4abf0053", "metadata": {}, "outputs": [], "source": [ "import requests\n", "from textwrap import wrap\n", "\n", "def query_loc_identifier(name: str)->str:\n", " \"\"\"\n", " \n", " \"\"\"\n", " name = name.upper()\n", " name = '/'.join(wrap(name,2))\n", " \n", " headers = {\n", " 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0',\n", " }\n", " \n", " response = requests.get(f'https://www.rightmove.co.uk/typeAhead/uknostreet/{name}', headers=headers)\n", " return response.json()" ] }, { "cell_type": "code", "execution_count": null, "id": "d185e013-9beb-4e57-9d8b-a830e14339c3", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "cabe5d3f-ad0f-49c2-9fbf-686539a05bd1", "metadata": {}, "outputs": [], "source": [ "\n" ] }, { "cell_type": "code", "execution_count": 30, "id": "ba500fe6-fb18-466e-a697-403d28181674", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 46, "id": "56c49b50-ff31-4785-9088-45ff5a39545e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'Greenwich': 'REGION^61226', 'Hillingdon': 'REGION^93959', 'Ealing': 'REGION^93947', 'Richmond upon Thames': 'REGION^61415', 'Sutton': 'REGION^93974', 'Wandsworth': 'REGION^93977', 'Camden': 'REGION^93941', 'Enfield': 'REGION^93950', 'Croydon': 'REGION^93944', 'Hackney': 'REGION^93953', 'Kingston upon Thames': 'REGION^93968', 'Kensington and Chelsea': 'REGION^61229', 'Bromley': 'REGION^93938', 'Brent': 'REGION^93935', 'Waltham Forest': 'REGION^61232', 'Southwark': 'REGION^61518', 'Harrow': 'REGION^93956', 'Lewisham': 'REGION^61413', 'Barnet': 'REGION^93929', 'Islington': 'REGION^93965', 'Haringey': 'REGION^61227', 'Lambeth': 'REGION^93971', 'Westminster': '', 'Tower Hamlets': 'REGION^61417', 'Havering': 'REGION^61228', 'Barking and Dagenham': 'REGION^61400', 'Hammersmith and Fulham': 'REGION^61407', 'Bexley': 'REGION^93932', 'Redbridge': 'REGION^61537', 'Newham': 'REGION^61231', 'Merton': 'REGION^61414', 'Hounslow': 'REGION^93962'}\n" ] } ], "source": [ "{\n", " \"City of London\": \"REGION^61224\",\n", " \"Greenwich\": \"REGION^61226\",\n", " \"Hillingdon\": \"REGION^93959\",\n", " \"Ealing\": \"REGION^93947\",\n", " \"Richmond upon Thames\": \"REGION^61415\",\n", " \"Sutton\": \"REGION^93974\",\n", " \"Wandsworth\": \"REGION^93977\",\n", " \"Camden\": \"REGION^93941\",\n", " \"Enfield\": \"REGION^93950\",\n", " \"Croydon\": \"REGION^93944\",\n", " \"Hackney\": \"REGION^93953\",\n", " \"Kingston upon Thames\": \"REGION^93968\",\n", " \"Kensington and Chelsea\": \"REGION^61229\",\n", " \"Bromley\": \"REGION^93938\",\n", " \"Brent\": \"REGION^93935\",\n", " \"Waltham Forest\": \"REGION^61232\",\n", " \"Southwark\": \"REGION^61518\",\n", " \"Harrow\": \"REGION^93956\",\n", " \"Lewisham\": \"REGION^61413\",\n", " \"Barnet\": \"REGION^93929\",\n", " \"Islington\": \"REGION^93965\",\n", " \"Haringey\": \"REGION^61227\",\n", " \"Lambeth\": \"REGION^93971\",\n", " \"Westminster\": \"REGION^93980\",\n", " \"Tower Hamlets\": \"REGION^61417\",\n", " \"Havering\": \"REGION^61228\",\n", " \"Barking and Dagenham\": \"REGION^61400\",\n", " \"Hammersmith and Fulham\": \"REGION^61407\",\n", " \"Bexley\": \"REGION^93932\",\n", " \"Redbridge\": \"REGION^61537\",\n", " \"Newham\": \"REGION^61231\",\n", " \"Merton\": \"REGION^61414\",\n", " \"Hounslow\": \"REGION^93962\",\n", "}\n", "\n", "bor_to_locid = {}\n", "\n", "for borough in boroughs:\n", " bor_to_locid[borough] = ''\n", " d = query_loc_identifier(borough)\n", " locs = d['typeAheadLocations']\n", " filtered = [l for l in locs if 'Borough' in l['displayName']]\n", " if len(filtered)>1:\n", " print(f\"{borough} has more entries: {len(filtered)}!\")\n", " if filtered:\n", " bor_to_locid[borough] = filtered[0]['locationIdentifier']\n", "\n", "Westminster (City of)\n", "print(bor_to_locid)" ] }, { "cell_type": "code", "execution_count": null, "id": "157e20a2-d137-45b8-802b-948fa8e04ba3", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "386578dc-1ad5-4b8a-8905-29b0c47a6174", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "10c17fdf-f424-40cb-9d8c-9218f8d4ab53", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "88d99eb7-8c92-4817-86ce-ba0738331dba", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "6c8b4488-ae2b-41ab-9c95-e3c85f9fb77e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c3072907-7ad8-4618-92ab-818e392218d9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "b20f6f16-3236-4772-b1a3-2d4a3b1925a6", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "1a7230dc-1a0f-43e2-bd15-0c85ea445733", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "aeaf84bf-8514-48c6-88ce-2c6828bdcdf2", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c888d4e6-d192-45df-b9b6-5e2d39bca344", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "id": "cd71db7f-ba11-4d5d-a183-768ed4db23ba", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/kadir/code/realestate/crawler/venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from data_access import Listing" ] }, { "cell_type": "code", "execution_count": 2, "id": "d0ced84b-ee91-4642-b2ff-dd32d9f1e437", "metadata": {}, "outputs": [], "source": [ "l = Listing(133604363)" ] }, { "cell_type": "code", "execution_count": 3, "id": "7157f5f7-65b3-4232-bcae-26b93e5d93e6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6395.76" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "l.serviceCharge" ] }, { "cell_type": "code", "execution_count": null, "id": "3f453f9c-bdaa-4713-8220-c504f1a436ae", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.0" } }, "nbformat": 4, "nbformat_minor": 5 }