diff --git a/crawler/exploration.ipynb b/crawler/exploration.ipynb
index 5972562..9246ebe 100644
--- a/crawler/exploration.ipynb
+++ b/crawler/exploration.ipynb
@@ -31,95 +31,59 @@
{
"cell_type": "code",
"execution_count": 2,
- "id": "db55b615-698c-4f5d-881a-ea1d3b6d6205",
+ "id": "424501ab-ecc6-42f5-b87e-b0d2871bdc74",
"metadata": {},
"outputs": [
{
- "name": "stdout",
+ "name": "stderr",
"output_type": "stream",
"text": [
- "(93, 2)\n"
+ "/var/folders/wl/kx43lvyn6yv7lq988gwrkq_m0000gn/T/ipykernel_85865/3290399543.py:3: FutureWarning: The behavior of 'to_datetime' with 'unit' when parsing strings is deprecated. In a future version, strings will be parsed as datetime strings, matching the behavior without a 'unit'. To retain the old behavior, explicitly cast ints or floats to numeric type before calling to_datetime.\n",
+ " decisions = pd.read_json(decisions_path)\n",
+ "/var/folders/wl/kx43lvyn6yv7lq988gwrkq_m0000gn/T/ipykernel_85865/3290399543.py:3: FutureWarning: The behavior of 'to_datetime' with 'unit' when parsing strings is deprecated. In a future version, strings will be parsed as datetime strings, matching the behavior without a 'unit'. To retain the old behavior, explicitly cast ints or floats to numeric type before calling to_datetime.\n",
+ " decisions = pd.read_json(decisions_path)\n"
]
- },
- {
- "data": {
- "text/html": [
- "
\n",
" \n",
" | 0 | \n",
- " 100506851 | \n",
- " 58.4 | \n",
- " 525000.0 | \n",
- " 8989.726027 | \n",
- " https://www.rightmove.co.uk/properties/100506851 | \n",
- " 2 | \n",
- " {'duration': 1948, 'distance': 10927, 'duratio... | \n",
- " {'duration': 1948, 'distance': 10927, 'duratio... | \n",
+ " 101369066 | \n",
+ " NaN | \n",
+ " 875000.0 | \n",
+ " NaN | \n",
+ " https://www.rightmove.co.uk/properties/101369066 | \n",
+ " 3 | \n",
+ " {'duration': 2252, 'distance': 7140, 'duration... | \n",
+ " {'duration': 2465, 'distance': 7502, 'duration... | \n",
+ " 0.0 | \n",
" NaN | \n",
" False | \n",
+ " Share of Freehold | \n",
+ " 12 | \n",
+ " None | \n",
+ " 0 | \n",
" None | \n",
"
\n",
" \n",
" | 1 | \n",
- " 100938761 | \n",
- " NaN | \n",
- " 390000.0 | \n",
- " NaN | \n",
- " https://www.rightmove.co.uk/properties/100938761 | \n",
+ " 105484772 | \n",
+ " 45.7 | \n",
+ " 325000.0 | \n",
+ " 7111.597374 | \n",
+ " https://www.rightmove.co.uk/properties/105484772 | \n",
" 1 | \n",
- " None | \n",
- " None | \n",
- " 996.0 | \n",
+ " {'duration': 1983, 'distance': 10095, 'duratio... | \n",
+ " {'duration': 2043, 'distance': 10083, 'duratio... | \n",
+ " 104.0 | \n",
+ " 641.53 | \n",
" False | \n",
+ " Leasehold | \n",
+ " 36 | \n",
+ " None | \n",
+ " 0 | \n",
" None | \n",
"
\n",
" \n",
" | 2 | \n",
- " 101817179 | \n",
- " 53.2 | \n",
- " 495000.0 | \n",
- " 9304.511278 | \n",
- " https://www.rightmove.co.uk/properties/101817179 | \n",
+ " 105827126 | \n",
+ " 58.5 | \n",
+ " 950000.0 | \n",
+ " 16239.316239 | \n",
+ " https://www.rightmove.co.uk/properties/105827126 | \n",
" 1 | \n",
- " {'duration': 2702, 'distance': 8637, 'duration... | \n",
- " {'duration': 3333, 'distance': 10013, 'duratio... | \n",
- " 91.0 | \n",
- " False | \n",
+ " {'duration': 2478, 'distance': 9584, 'duration... | \n",
+ " {'duration': 2478, 'distance': 9584, 'duration... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " True | \n",
+ " Leasehold | \n",
+ " 2 | \n",
+ " None | \n",
+ " 0 | \n",
" None | \n",
"
\n",
" \n",
" | 3 | \n",
- " 101939660 | \n",
- " 56.5 | \n",
- " 238000.0 | \n",
- " 4212.389381 | \n",
- " https://www.rightmove.co.uk/properties/101939660 | \n",
- " 2 | \n",
- " {'duration': 2262, 'distance': 13512, 'duratio... | \n",
- " {'duration': 2322, 'distance': 13491, 'duratio... | \n",
- " 0.0 | \n",
+ " 105836849 | \n",
+ " NaN | \n",
+ " 400000.0 | \n",
+ " NaN | \n",
+ " https://www.rightmove.co.uk/properties/105836849 | \n",
+ " 3 | \n",
+ " {'duration': 2565, 'distance': 14070, 'duratio... | \n",
+ " {'duration': 2565, 'distance': 14070, 'duratio... | \n",
+ " NaN | \n",
+ " NaN | \n",
" False | \n",
+ " Leasehold | \n",
+ " 393 | \n",
+ " None | \n",
+ " 20 | \n",
" None | \n",
"
\n",
" \n",
" | 4 | \n",
- " 102103157 | \n",
- " NaN | \n",
- " 425000.0 | \n",
- " NaN | \n",
- " https://www.rightmove.co.uk/properties/102103157 | \n",
+ " 108102476 | \n",
+ " 53.7 | \n",
+ " 515000.0 | \n",
+ " 9590.316574 | \n",
+ " https://www.rightmove.co.uk/properties/108102476 | \n",
" 1 | \n",
- " None | \n",
- " None | \n",
+ " {'duration': 1266, 'distance': 4042, 'duration... | \n",
+ " {'duration': 1861, 'distance': 4548, 'duration... | \n",
+ " 104.0 | \n",
" NaN | \n",
" False | \n",
+ " Leasehold | \n",
+ " 16 | \n",
+ " None | \n",
+ " 0 | \n",
" None | \n",
"
\n",
" \n",
@@ -317,152 +326,195 @@
" | ... | \n",
" ... | \n",
" ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
"
\n",
" \n",
- " | 34537 | \n",
- " 97023443 | \n",
- " 8.3 | \n",
- " 699999.0 | \n",
- " 84337.228916 | \n",
- " https://www.rightmove.co.uk/properties/97023443 | \n",
+ " 18503 | \n",
+ " 94206080 | \n",
+ " 49.6 | \n",
+ " 899000.0 | \n",
+ " 18125.000000 | \n",
+ " https://www.rightmove.co.uk/properties/94206080 | \n",
" 1 | \n",
- " {'duration': 1704, 'distance': 8729, 'duration... | \n",
- " {'duration': 1713, 'distance': 6755, 'duration... | \n",
- " 993.0 | \n",
+ " {'duration': 1125, 'distance': 4637, 'duration... | \n",
+ " {'duration': 1125, 'distance': 4641, 'duration... | \n",
+ " NaN | \n",
+ " NaN | \n",
" True | \n",
+ " Leasehold | \n",
+ " 256 | \n",
+ " None | \n",
+ " 0 | \n",
" None | \n",
"
\n",
" \n",
- " | 34538 | \n",
- " 97124237 | \n",
- " 53.4 | \n",
- " 300000.0 | \n",
- " 5617.977528 | \n",
- " https://www.rightmove.co.uk/properties/97124237 | \n",
+ " 18504 | \n",
+ " 94206329 | \n",
+ " NaN | \n",
+ " 700000.0 | \n",
+ " NaN | \n",
+ " https://www.rightmove.co.uk/properties/94206329 | \n",
" 1 | \n",
- " None | \n",
- " None | \n",
+ " {'duration': 2172, 'distance': 12497, 'duratio... | \n",
+ " {'duration': 2112, 'distance': 12497, 'duratio... | \n",
+ " NaN | \n",
" NaN | \n",
" False | \n",
+ " Leasehold | \n",
+ " 256 | \n",
+ " None | \n",
+ " 20 | \n",
" None | \n",
"
\n",
" \n",
- " | 34539 | \n",
- " 97335680 | \n",
- " 48.0 | \n",
- " 315000.0 | \n",
- " 6562.500000 | \n",
- " https://www.rightmove.co.uk/properties/97335680 | \n",
+ " 18505 | \n",
+ " 94508306 | \n",
+ " 94.0 | \n",
+ " 1000000.0 | \n",
+ " 10638.297872 | \n",
+ " https://www.rightmove.co.uk/properties/94508306 | \n",
" 2 | \n",
- " None | \n",
- " None | \n",
+ " {'duration': 1046, 'distance': 2193, 'duration... | \n",
+ " {'duration': 1046, 'distance': 2193, 'duration... | \n",
+ " 977.0 | \n",
" NaN | \n",
" False | \n",
+ " Leasehold | \n",
+ " 149 | \n",
+ " None | \n",
+ " 0 | \n",
" None | \n",
"
\n",
" \n",
- " | 34540 | \n",
- " 97522346 | \n",
+ " 18506 | \n",
+ " 95975483 | \n",
" NaN | \n",
- " 400000.0 | \n",
+ " 800000.0 | \n",
" NaN | \n",
- " https://www.rightmove.co.uk/properties/97522346 | \n",
+ " https://www.rightmove.co.uk/properties/95975483 | \n",
" 2 | \n",
- " None | \n",
- " None | \n",
- " NaN | \n",
+ " {'duration': 2281, 'distance': 7262, 'duration... | \n",
+ " {'duration': 2815, 'distance': 5607, 'duration... | \n",
+ " 999.0 | \n",
+ " 0.00 | \n",
" False | \n",
+ " Leasehold | \n",
+ " 3 | \n",
+ " None | \n",
+ " 0 | \n",
" None | \n",
"
\n",
" \n",
- " | 34541 | \n",
- " 98352914 | \n",
- " NaN | \n",
- " 399950.0 | \n",
- " NaN | \n",
- " https://www.rightmove.co.uk/properties/98352914 | \n",
+ " 18507 | \n",
+ " 96773996 | \n",
+ " 70.8 | \n",
+ " 1000000.0 | \n",
+ " 14124.293785 | \n",
+ " https://www.rightmove.co.uk/properties/96773996 | \n",
" 2 | \n",
- " None | \n",
- " None | \n",
- " 125.0 | \n",
+ " {'duration': 1608, 'distance': 8301, 'duration... | \n",
+ " {'duration': 1608, 'distance': 8301, 'duration... | \n",
+ " 992.0 | \n",
+ " 4716.36 | \n",
" True | \n",
+ " Leasehold | \n",
+ " 227 | \n",
+ " None | \n",
+ " 20 | \n",
" None | \n",
"
\n",
" \n",
"\n",
- "