From 4c40462bb86250e11d58b6c32363158230754a81 Mon Sep 17 00:00:00 2001 From: Kadir Date: Mon, 25 Mar 2024 20:58:35 +0000 Subject: [PATCH] adding property type --- crawler/exploration.ipynb | 68 +++++++++++++++++++++++++++++++++++++-- crawler/rec/query.py | 14 ++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/crawler/exploration.ipynb b/crawler/exploration.ipynb index df1160f..371bb0f 100644 --- a/crawler/exploration.ipynb +++ b/crawler/exploration.ipynb @@ -1605,12 +1605,76 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "015e870d-0cf0-4d07-a9ae-4e80d128b26c", "metadata": {}, "outputs": [], "source": [ - "\n" + "import enum" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "577dcd5f-07df-4d61-b837-a90db59e3ed5", + "metadata": {}, + "outputs": [], + "source": [ + "class PropertyType(enum.Enum):\n", + " def __str__(self):\n", + " return str(self.value)\n", + " \n", + " BUNGALOW= \"bungalow\"\n", + " DETACHED= \"detached\"\n", + " FLAT= \"flat\"\n", + " LAND= \"land\"\n", + " PARK_HOME= \"park-home\"\n", + " SEMI_DETACHED= \"semi-detached\"\n", + " TERRACED= \"terraced\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "50cc2eb3-1c3b-49b8-86a3-73dd2d151a61", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "sequence item 0: expected str instance, PropertyType found", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[13], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m x \u001b[38;5;241m=\u001b[39m [PropertyType\u001b[38;5;241m.\u001b[39mBUNGALOW,PropertyType\u001b[38;5;241m.\u001b[39mLAND]\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m,\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: sequence item 0: expected str instance, PropertyType found" + ] + } + ], + "source": [ + "x = [PropertyType.BUNGALOW,PropertyType.LAND]\n", + "','.join(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "09c02626-8159-4a39-8a8e-21d2fd241301", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[, ]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x" ] } ], diff --git a/crawler/rec/query.py b/crawler/rec/query.py index 2aa45b6..90d2184 100644 --- a/crawler/rec/query.py +++ b/crawler/rec/query.py @@ -1,4 +1,6 @@ # from diskcache import Cache +import enum +from typing import List import requests # from rec.db import RightmoveListing @@ -15,6 +17,15 @@ headers = { "Connection": "close", } +class PropertyType(enum.StrEnum): + BUNGALOW= "bungalow" + DETACHED= "detached" + FLAT= "flat" + LAND= "land" + PARK_HOME= "park-home" + SEMI_DETACHED= "semi-detached" + TERRACED= "terraced" + def detail_query(detail_id: int): params = { @@ -39,6 +50,7 @@ def listing_query( max_price: int, mustNewHome: bool = False, max_days_since_added: int = None, + property_type: List['PropertyType'] = [] ) -> dict: params = { "locationIdentifier": "POSTCODE^4228216", @@ -57,6 +69,8 @@ def listing_query( "apiApplication": "ANDROID", "appVersion": "3.70.0", } + if len(property_type) > 0: + params['propertyTypes'] = ','.join(property_type) if max_days_since_added: if max_days_since_added not in [1, 3, 7, 14]: raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])