adding tasks and updating exploration notebook
This commit is contained in:
parent
a7a4f88a39
commit
6a43a7f485
2 changed files with 258 additions and 31 deletions
|
|
@ -1,3 +1,7 @@
|
|||
## Extra
|
||||
|
||||
- [ ] The routing is now expensive. I could simplify it by finding the walking distance to the nearest trainstations with overpass turbo and then have a routing map between stations.
|
||||
|
||||
- [ ] Partition query further as each query can listing query can only grab a 1000 entries at most. If the query is too broad, it will fail afterwards.
|
||||
|
||||
- District: City of London, totalAvailableResults: 60
|
||||
|
|
@ -1615,15 +1615,12 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 17,
|
||||
"id": "577dcd5f-07df-4d61-b837-a90db59e3ed5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class PropertyType(enum.Enum):\n",
|
||||
" def __str__(self):\n",
|
||||
" return str(self.value)\n",
|
||||
" \n",
|
||||
"class PropertyType(enum.StrEnum): \n",
|
||||
" BUNGALOW= \"bungalow\"\n",
|
||||
" DETACHED= \"detached\"\n",
|
||||
" FLAT= \"flat\"\n",
|
||||
|
|
@ -1635,47 +1632,273 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 19,
|
||||
"id": "50cc2eb3-1c3b-49b8-86a3-73dd2d151a61",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "TypeError",
|
||||
"evalue": "sequence item 0: expected str instance, PropertyType found",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[13], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m x \u001b[38;5;241m=\u001b[39m [PropertyType\u001b[38;5;241m.\u001b[39mBUNGALOW,PropertyType\u001b[38;5;241m.\u001b[39mLAND]\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m,\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[0;31mTypeError\u001b[0m: sequence item 0: expected str instance, PropertyType found"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"x = [PropertyType.BUNGALOW,PropertyType.LAND]\n",
|
||||
"','.join(x)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "09c02626-8159-4a39-8a8e-21d2fd241301",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[<PropertyType.BUNGALOW: 'bungalow'>, <PropertyType.LAND: 'land'>]"
|
||||
"'bungalow,park-home'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"x = [PropertyType.BUNGALOW,PropertyType.PARK_HOME]\n",
|
||||
"','.join(x)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "87ead853-8a71-4de9-98d1-f4f2673a5592",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"x"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "862e9e52-53fa-4bf9-8e31-7847481d45be",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e284aefd-c280-4d82-935c-969b022b6bbc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "04bb61d5-cba7-4739-9568-b00342c1b636",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a73fba2d-afeb-4194-8421-eff8e84a14e9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Typeahead"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "61844fe2-408d-4b89-995f-c31110a850f6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'key': 'CAMD',\n",
|
||||
" 'term': 'CAMD',\n",
|
||||
" 'typeAheadLocations': [{'displayName': 'Camden, North West London',\n",
|
||||
" 'locationIdentifier': 'REGION^85261',\n",
|
||||
" 'normalisedSearchTerm': 'CAMDEN NORTH WEST LONDON'},\n",
|
||||
" {'displayName': 'Camden (London Borough)',\n",
|
||||
" 'locationIdentifier': 'REGION^93941',\n",
|
||||
" 'normalisedSearchTerm': 'CAMDEN LONDON BOROUGH'},\n",
|
||||
" {'displayName': 'Camden Town, North West London',\n",
|
||||
" 'locationIdentifier': 'REGION^85262',\n",
|
||||
" 'normalisedSearchTerm': 'CAMDEN TOWN NORTH WEST LONDON'},\n",
|
||||
" {'displayName': 'Camden Town Station',\n",
|
||||
" 'locationIdentifier': 'STATION^1712',\n",
|
||||
" 'normalisedSearchTerm': 'CAMDEN TOWN STATION'},\n",
|
||||
" {'displayName': 'Camden Road Station',\n",
|
||||
" 'locationIdentifier': 'STATION^1709',\n",
|
||||
" 'normalisedSearchTerm': 'CAMDEN ROAD STATION'},\n",
|
||||
" {'displayName': 'Camden Town, Gosport, Hampshire',\n",
|
||||
" 'locationIdentifier': 'REGION^76577',\n",
|
||||
" 'normalisedSearchTerm': 'CAMDEN TOWN GOSPORT HAMPSHIRE'},\n",
|
||||
" {'displayName': 'Camderry, Omagh, County Tyrone, Northern Ireland',\n",
|
||||
" 'locationIdentifier': 'REGION^73327',\n",
|
||||
" 'normalisedSearchTerm': 'CAMDERRY OMAGH COUNTY TYRONE NORTHERN IRELAND'}],\n",
|
||||
" 'isComplete': True}"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"cookies = {\n",
|
||||
" 'permuserid': '240330LSEXEOANG04Q2VA3OZCIQ8TTSQ',\n",
|
||||
" 'TS019c0ed0': '012f990cd3494097746bc0b10b8d61bc6237319024e80701e0e8d735bd7d62a792529823c3164c771f223b0cade5ec9ae4b7fc001c',\n",
|
||||
" 'beta_optin': 'N:36:-1',\n",
|
||||
" 'RM_Register': 'C',\n",
|
||||
" 'JSESSIONID': '0BE8E261D81387C9BC530DB1A5F28955',\n",
|
||||
" 'svr': '3111',\n",
|
||||
" 'permuserid': '240330LSEXEOANG04Q2VA3OZCIQ8TTSQ',\n",
|
||||
" 'TS01ec61d1': '012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1',\n",
|
||||
" 'rmsessionid': '7bc54ce6-da97-42cf-8719-4e3e9c53e276',\n",
|
||||
" 'TS01821201': '012f990cd35255a563a541cfe06e4a774c129628165af71838ccdb7a17919672962514804459a0d9d9a90fe7b8feeec66145e30b98',\n",
|
||||
" 'TS01826437': '012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1',\n",
|
||||
" 'TPCmaxPrice': '800000',\n",
|
||||
" 'TS01a07bd2': '012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1',\n",
|
||||
" 'TPCminPrice': '700000',\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"headers = {\n",
|
||||
" 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0',\n",
|
||||
" 'Accept': 'application/json, text/javascript',\n",
|
||||
" 'Accept-Language': 'en-GB,en;q=0.5',\n",
|
||||
" # 'Accept-Encoding': 'gzip, deflate, br',\n",
|
||||
" 'Referer': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E87515&maxBedrooms=3&minBedrooms=1&maxPrice=800000&minPrice=750001&propertyTypes=&includeSSTC=false&mustHave=&dontShow=retirement%2CsharedOwnership&furnishTypes=&keywords=',\n",
|
||||
" 'X-Correlation-Text-Val': 'source=search',\n",
|
||||
" 'Content-Type': 'application/x-www-form-urlencoded',\n",
|
||||
" 'traceparent': '00-d3020142d839bf1ef2b172fa596acea8-605693e1c4c5cbb5-00',\n",
|
||||
" 'DNT': '1',\n",
|
||||
" 'Sec-GPC': '1',\n",
|
||||
" 'Connection': 'keep-alive',\n",
|
||||
" # 'Cookie': 'permuserid=240330LSEXEOANG04Q2VA3OZCIQ8TTSQ; TS019c0ed0=012f990cd3494097746bc0b10b8d61bc6237319024e80701e0e8d735bd7d62a792529823c3164c771f223b0cade5ec9ae4b7fc001c; beta_optin=N:36:-1; RM_Register=C; JSESSIONID=0BE8E261D81387C9BC530DB1A5F28955; svr=3111; permuserid=240330LSEXEOANG04Q2VA3OZCIQ8TTSQ; TS01ec61d1=012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1; rmsessionid=7bc54ce6-da97-42cf-8719-4e3e9c53e276; TS01821201=012f990cd35255a563a541cfe06e4a774c129628165af71838ccdb7a17919672962514804459a0d9d9a90fe7b8feeec66145e30b98; TS01826437=012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1; TPCmaxPrice=800000; TS01a07bd2=012f990cd3161dd68e4ed69b9d64f7d4de2356c651edef96201cc7facd11b28ef9338596a2022bc2ed7a56f09c5dee5aa1711de2d1; TPCminPrice=700000',\n",
|
||||
" 'Sec-Fetch-Dest': 'empty',\n",
|
||||
" 'Sec-Fetch-Mode': 'cors',\n",
|
||||
" 'Sec-Fetch-Site': 'same-origin',\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"response = requests.get('https://www.rightmove.co.uk/typeAhead/uknostreet/CA/MD/EN/', cookies=cookies, headers=headers)\n",
|
||||
"response.json()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "eba907d0-680d-4374-a56e-95bd4abf0053",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from textwrap import wrap\n",
|
||||
"\n",
|
||||
"def query_loc_identifier(name: str)->str:\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" \"\"\"\n",
|
||||
" name = name.upper()\n",
|
||||
" name = '/'.join(wrap(name,2))\n",
|
||||
" \n",
|
||||
" headers = {\n",
|
||||
" 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0',\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" response = requests.get(f'https://www.rightmove.co.uk/typeAhead/uknostreet/{name}', headers=headers)\n",
|
||||
" return response.json()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d185e013-9beb-4e57-9d8b-a830e14339c3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cabe5d3f-ad0f-49c2-9fbf-686539a05bd1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "ba500fe6-fb18-466e-a697-403d28181674",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "56c49b50-ff31-4785-9088-45ff5a39545e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'Greenwich': 'REGION^61226', 'Hillingdon': 'REGION^93959', 'Ealing': 'REGION^93947', 'Richmond upon Thames': 'REGION^61415', 'Sutton': 'REGION^93974', 'Wandsworth': 'REGION^93977', 'Camden': 'REGION^93941', 'Enfield': 'REGION^93950', 'Croydon': 'REGION^93944', 'Hackney': 'REGION^93953', 'Kingston upon Thames': 'REGION^93968', 'Kensington and Chelsea': 'REGION^61229', 'Bromley': 'REGION^93938', 'Brent': 'REGION^93935', 'Waltham Forest': 'REGION^61232', 'Southwark': 'REGION^61518', 'Harrow': 'REGION^93956', 'Lewisham': 'REGION^61413', 'Barnet': 'REGION^93929', 'Islington': 'REGION^93965', 'Haringey': 'REGION^61227', 'Lambeth': 'REGION^93971', 'Westminster': '', 'Tower Hamlets': 'REGION^61417', 'Havering': 'REGION^61228', 'Barking and Dagenham': 'REGION^61400', 'Hammersmith and Fulham': 'REGION^61407', 'Bexley': 'REGION^93932', 'Redbridge': 'REGION^61537', 'Newham': 'REGION^61231', 'Merton': 'REGION^61414', 'Hounslow': 'REGION^93962'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"{\n",
|
||||
" \"City of London\": \"REGION^61224\",\n",
|
||||
" \"Greenwich\": \"REGION^61226\",\n",
|
||||
" \"Hillingdon\": \"REGION^93959\",\n",
|
||||
" \"Ealing\": \"REGION^93947\",\n",
|
||||
" \"Richmond upon Thames\": \"REGION^61415\",\n",
|
||||
" \"Sutton\": \"REGION^93974\",\n",
|
||||
" \"Wandsworth\": \"REGION^93977\",\n",
|
||||
" \"Camden\": \"REGION^93941\",\n",
|
||||
" \"Enfield\": \"REGION^93950\",\n",
|
||||
" \"Croydon\": \"REGION^93944\",\n",
|
||||
" \"Hackney\": \"REGION^93953\",\n",
|
||||
" \"Kingston upon Thames\": \"REGION^93968\",\n",
|
||||
" \"Kensington and Chelsea\": \"REGION^61229\",\n",
|
||||
" \"Bromley\": \"REGION^93938\",\n",
|
||||
" \"Brent\": \"REGION^93935\",\n",
|
||||
" \"Waltham Forest\": \"REGION^61232\",\n",
|
||||
" \"Southwark\": \"REGION^61518\",\n",
|
||||
" \"Harrow\": \"REGION^93956\",\n",
|
||||
" \"Lewisham\": \"REGION^61413\",\n",
|
||||
" \"Barnet\": \"REGION^93929\",\n",
|
||||
" \"Islington\": \"REGION^93965\",\n",
|
||||
" \"Haringey\": \"REGION^61227\",\n",
|
||||
" \"Lambeth\": \"REGION^93971\",\n",
|
||||
" \"Westminster\": \"REGION^93980\",\n",
|
||||
" \"Tower Hamlets\": \"REGION^61417\",\n",
|
||||
" \"Havering\": \"REGION^61228\",\n",
|
||||
" \"Barking and Dagenham\": \"REGION^61400\",\n",
|
||||
" \"Hammersmith and Fulham\": \"REGION^61407\",\n",
|
||||
" \"Bexley\": \"REGION^93932\",\n",
|
||||
" \"Redbridge\": \"REGION^61537\",\n",
|
||||
" \"Newham\": \"REGION^61231\",\n",
|
||||
" \"Merton\": \"REGION^61414\",\n",
|
||||
" \"Hounslow\": \"REGION^93962\",\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"bor_to_locid = {}\n",
|
||||
"\n",
|
||||
"for borough in boroughs:\n",
|
||||
" bor_to_locid[borough] = ''\n",
|
||||
" d = query_loc_identifier(borough)\n",
|
||||
" locs = d['typeAheadLocations']\n",
|
||||
" filtered = [l for l in locs if 'Borough' in l['displayName']]\n",
|
||||
" if len(filtered)>1:\n",
|
||||
" print(f\"{borough} has more entries: {len(filtered)}!\")\n",
|
||||
" if filtered:\n",
|
||||
" bor_to_locid[borough] = filtered[0]['locationIdentifier']\n",
|
||||
"\n",
|
||||
"Westminster (City of)\n",
|
||||
"print(bor_to_locid)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "157e20a2-d137-45b8-802b-948fa8e04ba3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue