decisions + logger

This commit is contained in:
Kadir 2025-05-12 01:01:19 +01:00
parent 962c9a2f38
commit 3f4be8b7ff
3 changed files with 217 additions and 203 deletions

File diff suppressed because one or more lines are too long

View file

@ -33,18 +33,7 @@
"execution_count": 2, "execution_count": 2,
"id": "424501ab-ecc6-42f5-b87e-b0d2871bdc74", "id": "424501ab-ecc6-42f5-b87e-b0d2871bdc74",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/wl/kx43lvyn6yv7lq988gwrkq_m0000gn/T/ipykernel_85865/3290399543.py:3: FutureWarning: The behavior of 'to_datetime' with 'unit' when parsing strings is deprecated. In a future version, strings will be parsed as datetime strings, matching the behavior without a 'unit'. To retain the old behavior, explicitly cast ints or floats to numeric type before calling to_datetime.\n",
" decisions = pd.read_json(decisions_path)\n",
"/var/folders/wl/kx43lvyn6yv7lq988gwrkq_m0000gn/T/ipykernel_85865/3290399543.py:3: FutureWarning: The behavior of 'to_datetime' with 'unit' when parsing strings is deprecated. In a future version, strings will be parsed as datetime strings, matching the behavior without a 'unit'. To retain the old behavior, explicitly cast ints or floats to numeric type before calling to_datetime.\n",
" decisions = pd.read_json(decisions_path)\n"
]
}
],
"source": [ "source": [
"# read decisions on file\n", "# read decisions on file\n",
"decisions_path = 'data/decisions.json'\n", "decisions_path = 'data/decisions.json'\n",
@ -147,7 +136,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"18508\n" "10574\n"
] ]
} }
], ],
@ -220,25 +209,6 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>101369066</td>\n",
" <td>NaN</td>\n",
" <td>875000.0</td>\n",
" <td>NaN</td>\n",
" <td>https://www.rightmove.co.uk/properties/101369066</td>\n",
" <td>3</td>\n",
" <td>{'duration': 2252, 'distance': 7140, 'duration...</td>\n",
" <td>{'duration': 2465, 'distance': 7502, 'duration...</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>Share of Freehold</td>\n",
" <td>12</td>\n",
" <td>None</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>105484772</td>\n", " <td>105484772</td>\n",
" <td>45.7</td>\n", " <td>45.7</td>\n",
" <td>325000.0</td>\n", " <td>325000.0</td>\n",
@ -251,13 +221,13 @@
" <td>641.53</td>\n", " <td>641.53</td>\n",
" <td>False</td>\n", " <td>False</td>\n",
" <td>Leasehold</td>\n", " <td>Leasehold</td>\n",
" <td>36</td>\n", " <td>116</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>1</th>\n",
" <td>105827126</td>\n", " <td>105827126</td>\n",
" <td>58.5</td>\n", " <td>58.5</td>\n",
" <td>950000.0</td>\n", " <td>950000.0</td>\n",
@ -270,32 +240,13 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>True</td>\n", " <td>True</td>\n",
" <td>Leasehold</td>\n", " <td>Leasehold</td>\n",
" <td>2</td>\n", " <td>83</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>2</th>\n",
" <td>105836849</td>\n",
" <td>NaN</td>\n",
" <td>400000.0</td>\n",
" <td>NaN</td>\n",
" <td>https://www.rightmove.co.uk/properties/105836849</td>\n",
" <td>3</td>\n",
" <td>{'duration': 2565, 'distance': 14070, 'duratio...</td>\n",
" <td>{'duration': 2565, 'distance': 14070, 'duratio...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>Leasehold</td>\n",
" <td>393</td>\n",
" <td>None</td>\n",
" <td>20</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>108102476</td>\n", " <td>108102476</td>\n",
" <td>53.7</td>\n", " <td>53.7</td>\n",
" <td>515000.0</td>\n", " <td>515000.0</td>\n",
@ -308,7 +259,45 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>False</td>\n", " <td>False</td>\n",
" <td>Leasehold</td>\n", " <td>Leasehold</td>\n",
" <td>16</td>\n", " <td>97</td>\n",
" <td>None</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>108171770</td>\n",
" <td>45.0</td>\n",
" <td>650000.0</td>\n",
" <td>14444.444444</td>\n",
" <td>https://www.rightmove.co.uk/properties/108171770</td>\n",
" <td>2</td>\n",
" <td>{'duration': 1591, 'distance': 7827, 'duration...</td>\n",
" <td>{'duration': 1591, 'distance': 7827, 'duration...</td>\n",
" <td>962.0</td>\n",
" <td>2000.00</td>\n",
" <td>False</td>\n",
" <td>Leasehold</td>\n",
" <td>261</td>\n",
" <td>None</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>109595123</td>\n",
" <td>NaN</td>\n",
" <td>1000000.0</td>\n",
" <td>NaN</td>\n",
" <td>https://www.rightmove.co.uk/properties/109595123</td>\n",
" <td>1</td>\n",
" <td>{'duration': 2463, 'distance': 9565, 'duration...</td>\n",
" <td>{'duration': 2463, 'distance': 9565, 'duration...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>Please confirm if this is a freehold or leaseh...</td>\n",
" <td>96</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
@ -333,7 +322,45 @@
" <td>...</td>\n", " <td>...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>18503</th>\n", " <th>10569</th>\n",
" <td>88731877</td>\n",
" <td>NaN</td>\n",
" <td>570000.0</td>\n",
" <td>NaN</td>\n",
" <td>https://www.rightmove.co.uk/properties/88731877</td>\n",
" <td>1</td>\n",
" <td>{'duration': 912, 'distance': 6329, 'duration_...</td>\n",
" <td>{'duration': 852, 'distance': 6329, 'duration_...</td>\n",
" <td>998.0</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>Leasehold</td>\n",
" <td>407</td>\n",
" <td>None</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10570</th>\n",
" <td>89825950</td>\n",
" <td>48.9</td>\n",
" <td>680000.0</td>\n",
" <td>13905.930470</td>\n",
" <td>https://www.rightmove.co.uk/properties/89825950</td>\n",
" <td>1</td>\n",
" <td>{'duration': 273, 'distance': 762, 'duration_s...</td>\n",
" <td>{'duration': 273, 'distance': 762, 'duration_s...</td>\n",
" <td>112.0</td>\n",
" <td>1700.00</td>\n",
" <td>False</td>\n",
" <td>Leasehold</td>\n",
" <td>113</td>\n",
" <td>None</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10571</th>\n",
" <td>94206080</td>\n", " <td>94206080</td>\n",
" <td>49.6</td>\n", " <td>49.6</td>\n",
" <td>899000.0</td>\n", " <td>899000.0</td>\n",
@ -346,32 +373,13 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>True</td>\n", " <td>True</td>\n",
" <td>Leasehold</td>\n", " <td>Leasehold</td>\n",
" <td>256</td>\n", " <td>337</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>18504</th>\n", " <th>10572</th>\n",
" <td>94206329</td>\n",
" <td>NaN</td>\n",
" <td>700000.0</td>\n",
" <td>NaN</td>\n",
" <td>https://www.rightmove.co.uk/properties/94206329</td>\n",
" <td>1</td>\n",
" <td>{'duration': 2172, 'distance': 12497, 'duratio...</td>\n",
" <td>{'duration': 2112, 'distance': 12497, 'duratio...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>Leasehold</td>\n",
" <td>256</td>\n",
" <td>None</td>\n",
" <td>20</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18505</th>\n",
" <td>94508306</td>\n", " <td>94508306</td>\n",
" <td>94.0</td>\n", " <td>94.0</td>\n",
" <td>1000000.0</td>\n", " <td>1000000.0</td>\n",
@ -384,13 +392,13 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>False</td>\n", " <td>False</td>\n",
" <td>Leasehold</td>\n", " <td>Leasehold</td>\n",
" <td>149</td>\n", " <td>230</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>0</td>\n", " <td>9</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>18506</th>\n", " <th>10573</th>\n",
" <td>95975483</td>\n", " <td>95975483</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>800000.0</td>\n", " <td>800000.0</td>\n",
@ -403,115 +411,109 @@
" <td>0.00</td>\n", " <td>0.00</td>\n",
" <td>False</td>\n", " <td>False</td>\n",
" <td>Leasehold</td>\n", " <td>Leasehold</td>\n",
" <td>3</td>\n", " <td>84</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n",
" <th>18507</th>\n",
" <td>96773996</td>\n",
" <td>70.8</td>\n",
" <td>1000000.0</td>\n",
" <td>14124.293785</td>\n",
" <td>https://www.rightmove.co.uk/properties/96773996</td>\n",
" <td>2</td>\n",
" <td>{'duration': 1608, 'distance': 8301, 'duration...</td>\n",
" <td>{'duration': 1608, 'distance': 8301, 'duration...</td>\n",
" <td>992.0</td>\n",
" <td>4716.36</td>\n",
" <td>True</td>\n",
" <td>Leasehold</td>\n",
" <td>227</td>\n",
" <td>None</td>\n",
" <td>20</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>18508 rows × 16 columns</p>\n", "<p>10574 rows × 16 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" identifier sqm_ocr price price_per_sqm \\\n", " identifier sqm_ocr price price_per_sqm \\\n",
"0 101369066 NaN 875000.0 NaN \n", "0 105484772 45.7 325000.0 7111.597374 \n",
"1 105484772 45.7 325000.0 7111.597374 \n", "1 105827126 58.5 950000.0 16239.316239 \n",
"2 105827126 58.5 950000.0 16239.316239 \n", "2 108102476 53.7 515000.0 9590.316574 \n",
"3 105836849 NaN 400000.0 NaN \n", "3 108171770 45.0 650000.0 14444.444444 \n",
"4 108102476 53.7 515000.0 9590.316574 \n", "4 109595123 NaN 1000000.0 NaN \n",
"... ... ... ... ... \n", "... ... ... ... ... \n",
"18503 94206080 49.6 899000.0 18125.000000 \n", "10569 88731877 NaN 570000.0 NaN \n",
"18504 94206329 NaN 700000.0 NaN \n", "10570 89825950 48.9 680000.0 13905.930470 \n",
"18505 94508306 94.0 1000000.0 10638.297872 \n", "10571 94206080 49.6 899000.0 18125.000000 \n",
"18506 95975483 NaN 800000.0 NaN \n", "10572 94508306 94.0 1000000.0 10638.297872 \n",
"18507 96773996 70.8 1000000.0 14124.293785 \n", "10573 95975483 NaN 800000.0 NaN \n",
"\n", "\n",
" url bedrooms \\\n", " url bedrooms \\\n",
"0 https://www.rightmove.co.uk/properties/101369066 3 \n", "0 https://www.rightmove.co.uk/properties/105484772 1 \n",
"1 https://www.rightmove.co.uk/properties/105484772 1 \n", "1 https://www.rightmove.co.uk/properties/105827126 1 \n",
"2 https://www.rightmove.co.uk/properties/105827126 1 \n", "2 https://www.rightmove.co.uk/properties/108102476 1 \n",
"3 https://www.rightmove.co.uk/properties/105836849 3 \n", "3 https://www.rightmove.co.uk/properties/108171770 2 \n",
"4 https://www.rightmove.co.uk/properties/108102476 1 \n", "4 https://www.rightmove.co.uk/properties/109595123 1 \n",
"... ... ... \n", "... ... ... \n",
"18503 https://www.rightmove.co.uk/properties/94206080 1 \n", "10569 https://www.rightmove.co.uk/properties/88731877 1 \n",
"18504 https://www.rightmove.co.uk/properties/94206329 1 \n", "10570 https://www.rightmove.co.uk/properties/89825950 1 \n",
"18505 https://www.rightmove.co.uk/properties/94508306 2 \n", "10571 https://www.rightmove.co.uk/properties/94206080 1 \n",
"18506 https://www.rightmove.co.uk/properties/95975483 2 \n", "10572 https://www.rightmove.co.uk/properties/94508306 2 \n",
"18507 https://www.rightmove.co.uk/properties/96773996 2 \n", "10573 https://www.rightmove.co.uk/properties/95975483 2 \n",
"\n", "\n",
" travel_time_fastest \\\n", " travel_time_fastest \\\n",
"0 {'duration': 2252, 'distance': 7140, 'duration... \n", "0 {'duration': 1983, 'distance': 10095, 'duratio... \n",
"1 {'duration': 1983, 'distance': 10095, 'duratio... \n", "1 {'duration': 2478, 'distance': 9584, 'duration... \n",
"2 {'duration': 2478, 'distance': 9584, 'duration... \n", "2 {'duration': 1266, 'distance': 4042, 'duration... \n",
"3 {'duration': 2565, 'distance': 14070, 'duratio... \n", "3 {'duration': 1591, 'distance': 7827, 'duration... \n",
"4 {'duration': 1266, 'distance': 4042, 'duration... \n", "4 {'duration': 2463, 'distance': 9565, 'duration... \n",
"... ... \n", "... ... \n",
"18503 {'duration': 1125, 'distance': 4637, 'duration... \n", "10569 {'duration': 912, 'distance': 6329, 'duration_... \n",
"18504 {'duration': 2172, 'distance': 12497, 'duratio... \n", "10570 {'duration': 273, 'distance': 762, 'duration_s... \n",
"18505 {'duration': 1046, 'distance': 2193, 'duration... \n", "10571 {'duration': 1125, 'distance': 4637, 'duration... \n",
"18506 {'duration': 2281, 'distance': 7262, 'duration... \n", "10572 {'duration': 1046, 'distance': 2193, 'duration... \n",
"18507 {'duration': 1608, 'distance': 8301, 'duration... \n", "10573 {'duration': 2281, 'distance': 7262, 'duration... \n",
"\n", "\n",
" travel_time_second lease_left \\\n", " travel_time_second lease_left \\\n",
"0 {'duration': 2465, 'distance': 7502, 'duration... 0.0 \n", "0 {'duration': 2043, 'distance': 10083, 'duratio... 104.0 \n",
"1 {'duration': 2043, 'distance': 10083, 'duratio... 104.0 \n", "1 {'duration': 2478, 'distance': 9584, 'duration... NaN \n",
"2 {'duration': 2478, 'distance': 9584, 'duration... NaN \n", "2 {'duration': 1861, 'distance': 4548, 'duration... 104.0 \n",
"3 {'duration': 2565, 'distance': 14070, 'duratio... NaN \n", "3 {'duration': 1591, 'distance': 7827, 'duration... 962.0 \n",
"4 {'duration': 1861, 'distance': 4548, 'duration... 104.0 \n", "4 {'duration': 2463, 'distance': 9565, 'duration... NaN \n",
"... ... ... \n", "... ... ... \n",
"18503 {'duration': 1125, 'distance': 4641, 'duration... NaN \n", "10569 {'duration': 852, 'distance': 6329, 'duration_... 998.0 \n",
"18504 {'duration': 2112, 'distance': 12497, 'duratio... NaN \n", "10570 {'duration': 273, 'distance': 762, 'duration_s... 112.0 \n",
"18505 {'duration': 1046, 'distance': 2193, 'duration... 977.0 \n", "10571 {'duration': 1125, 'distance': 4641, 'duration... NaN \n",
"18506 {'duration': 2815, 'distance': 5607, 'duration... 999.0 \n", "10572 {'duration': 1046, 'distance': 2193, 'duration... 977.0 \n",
"18507 {'duration': 1608, 'distance': 8301, 'duration... 992.0 \n", "10573 {'duration': 2815, 'distance': 5607, 'duration... 999.0 \n",
"\n", "\n",
" service_charge development tenure_type updated_days status \\\n", " service_charge development \\\n",
"0 NaN False Share of Freehold 12 None \n", "0 641.53 False \n",
"1 641.53 False Leasehold 36 None \n", "1 NaN True \n",
"2 NaN True Leasehold 2 None \n", "2 NaN False \n",
"3 NaN False Leasehold 393 None \n", "3 2000.00 False \n",
"4 NaN False Leasehold 16 None \n", "4 NaN True \n",
"... ... ... ... ... ... \n", "... ... ... \n",
"18503 NaN True Leasehold 256 None \n", "10569 NaN False \n",
"18504 NaN False Leasehold 256 None \n", "10570 1700.00 False \n",
"18505 NaN False Leasehold 149 None \n", "10571 NaN True \n",
"18506 0.00 False Leasehold 3 None \n", "10572 NaN False \n",
"18507 4716.36 True Leasehold 227 None \n", "10573 0.00 False \n",
"\n",
" tenure_type updated_days status \\\n",
"0 Leasehold 116 None \n",
"1 Leasehold 83 None \n",
"2 Leasehold 97 None \n",
"3 Leasehold 261 None \n",
"4 Please confirm if this is a freehold or leaseh... 96 None \n",
"... ... ... ... \n",
"10569 Leasehold 407 None \n",
"10570 Leasehold 113 None \n",
"10571 Leasehold 337 None \n",
"10572 Leasehold 230 None \n",
"10573 Leasehold 84 None \n",
"\n", "\n",
" last_seen decision \n", " last_seen decision \n",
"0 0 None \n", "0 0 None \n",
"1 0 None \n", "1 0 None \n",
"2 0 None \n", "2 0 None \n",
"3 20 None \n", "3 0 None \n",
"4 0 None \n", "4 0 None \n",
"... ... ... \n", "... ... ... \n",
"18503 0 None \n", "10569 0 None \n",
"18504 20 None \n", "10570 0 None \n",
"18505 0 None \n", "10571 0 None \n",
"18506 0 None \n", "10572 9 None \n",
"18507 20 None \n", "10573 0 None \n",
"\n", "\n",
"[18508 rows x 16 columns]" "[10574 rows x 16 columns]"
] ]
}, },
"execution_count": 8, "execution_count": 8,
@ -534,7 +536,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"(18508, 16)" "(10574, 16)"
] ]
}, },
"execution_count": 9, "execution_count": 9,
@ -600,7 +602,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"(17217, 18)" "(9494, 18)"
] ]
}, },
"execution_count": 12, "execution_count": 12,
@ -650,12 +652,12 @@
"3 None\n", "3 None\n",
"4 None\n", "4 None\n",
" ... \n", " ... \n",
"18503 None\n", "10569 None\n",
"18504 None\n", "10570 None\n",
"18505 None\n", "10571 None\n",
"18506 None\n", "10572 None\n",
"18507 None\n", "10573 None\n",
"Name: status, Length: 17217, dtype: object" "Name: status, Length: 9494, dtype: object"
] ]
}, },
"execution_count": 13, "execution_count": 13,
@ -676,7 +678,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"(10396, 17)" "(6578, 17)"
] ]
}, },
"execution_count": 14, "execution_count": 14,
@ -742,26 +744,6 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>101369066</td>\n",
" <td>-1.0</td>\n",
" <td>875000.0</td>\n",
" <td>NaN</td>\n",
" <td>https://www.rightmove.co.uk/properties/101369066</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>-1.00</td>\n",
" <td>False</td>\n",
" <td>Share of Freehold</td>\n",
" <td>12</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" <td>38</td>\n",
" <td>142</td>\n",
" <td>{'WALK': 797, 'TRANSIT': 1227}</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>105484772</td>\n", " <td>105484772</td>\n",
" <td>45.7</td>\n", " <td>45.7</td>\n",
" <td>325000.0</td>\n", " <td>325000.0</td>\n",
@ -772,7 +754,7 @@
" <td>641.53</td>\n", " <td>641.53</td>\n",
" <td>False</td>\n", " <td>False</td>\n",
" <td>Leasehold</td>\n", " <td>Leasehold</td>\n",
" <td>36</td>\n", " <td>116</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>33</td>\n", " <td>33</td>\n",
@ -780,30 +762,50 @@
" <td>{'WALK': 609, 'TRANSIT': 1109}</td>\n", " <td>{'WALK': 609, 'TRANSIT': 1109}</td>\n",
" <td>2</td>\n", " <td>2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>108102476</td>\n",
" <td>53.7</td>\n",
" <td>515000.0</td>\n",
" <td>9590.316574</td>\n",
" <td>https://www.rightmove.co.uk/properties/108102476</td>\n",
" <td>1</td>\n",
" <td>104.0</td>\n",
" <td>-1.00</td>\n",
" <td>False</td>\n",
" <td>Leasehold</td>\n",
" <td>97</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" <td>21</td>\n",
" <td>593</td>\n",
" <td>{'WALK': 819, 'TRANSIT': 445}</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" identifier sqm_ocr price price_per_sqm \\\n", " identifier sqm_ocr price price_per_sqm \\\n",
"0 101369066 -1.0 875000.0 NaN \n", "0 105484772 45.7 325000.0 7111.597374 \n",
"1 105484772 45.7 325000.0 7111.597374 \n", "2 108102476 53.7 515000.0 9590.316574 \n",
"\n", "\n",
" url bedrooms lease_left \\\n", " url bedrooms lease_left \\\n",
"0 https://www.rightmove.co.uk/properties/101369066 3 0.0 \n", "0 https://www.rightmove.co.uk/properties/105484772 1 104.0 \n",
"1 https://www.rightmove.co.uk/properties/105484772 1 104.0 \n", "2 https://www.rightmove.co.uk/properties/108102476 1 104.0 \n",
"\n", "\n",
" service_charge development tenure_type updated_days last_seen \\\n", " service_charge development tenure_type updated_days last_seen decision \\\n",
"0 -1.00 False Share of Freehold 12 0 \n", "0 641.53 False Leasehold 116 0 None \n",
"1 641.53 False Leasehold 36 0 \n", "2 -1.00 False Leasehold 97 0 None \n",
"\n", "\n",
" decision duration initial_walk_duration duration_per_transit \\\n", " duration initial_walk_duration duration_per_transit \\\n",
"0 None 38 142 {'WALK': 797, 'TRANSIT': 1227} \n", "0 33 372 {'WALK': 609, 'TRANSIT': 1109} \n",
"1 None 33 372 {'WALK': 609, 'TRANSIT': 1109} \n", "2 21 593 {'WALK': 819, 'TRANSIT': 445} \n",
"\n", "\n",
" number_of_transit_stops \n", " number_of_transit_stops \n",
"0 2 \n", "0 2 \n",
"1 2 " "2 1 "
] ]
}, },
"execution_count": 15, "execution_count": 15,

12
crawler/logger.py Normal file
View file

@ -0,0 +1,12 @@
import logging
def createLogger(name):
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('app.log'),
logging.StreamHandler()
]
)
return logging.getLogger(name)