fixing bugs, adding properties for querying and analysis§
This commit is contained in:
parent
d108bf11ee
commit
de2639f9c3
6 changed files with 2848 additions and 6 deletions
6
crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb
Normal file
6
crawler/.ipynb_checkpoints/exploration-checkpoint.ipynb
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
25
crawler/.ipynb_checkpoints/pyproject-checkpoint.toml
Normal file
25
crawler/.ipynb_checkpoints/pyproject-checkpoint.toml
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
[tool.poetry]
|
||||
name = "rec"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Kadir Tugan <git@k8n.dev>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">3.11"
|
||||
SQLAlchemy = "^2.0.23"
|
||||
requests = "^2.31.0"
|
||||
cachetools = "^5.3.2"
|
||||
diskcache = "^5.6.3"
|
||||
tqdm = "^4.66.2"
|
||||
pillow = "^10.2.0"
|
||||
torch = "^2.2.1"
|
||||
numpy = "^1.26.4"
|
||||
transformers = "^4.38.2"
|
||||
pytesseract = "^0.3.10"
|
||||
jupyterlab = "^4.1.4"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
from dataclasses import dataclass
|
||||
import json
|
||||
import pathlib
|
||||
from typing import List
|
||||
from typing import List, Dict
|
||||
from rec import floorplan
|
||||
|
||||
_DATA_DIR = pathlib.Path('data/rs/')
|
||||
|
|
@ -9,6 +9,7 @@ _DATA_DIR = pathlib.Path('data/rs/')
|
|||
@dataclass()
|
||||
class Listing():
|
||||
identifier: int
|
||||
_cached: Dict = None
|
||||
|
||||
@staticmethod
|
||||
def get_all_listings() -> List['Listing']:
|
||||
|
|
@ -70,7 +71,7 @@ class Listing():
|
|||
|
||||
@property
|
||||
def sqm_model(self, recalculate=True):
|
||||
if recalculate and not self.path_floorplan_model_json().exists():
|
||||
if not self.path_floorplan_model_json().exists() or recalculate:
|
||||
self.calculate_sqm_model()
|
||||
|
||||
with open(self.path_floorplan_json()) as f:
|
||||
|
|
@ -93,16 +94,49 @@ class Listing():
|
|||
json.dump(objs, f)
|
||||
|
||||
@property
|
||||
def sqm_ocr(self, recalculate=True):
|
||||
if recalculate and not self.path_floorplan_ocr_json().exists():
|
||||
def sqm_ocr(self, recalculate=False):
|
||||
if not self.path_floorplan_ocr_json().exists() or recalculate:
|
||||
self.calculate_sqm_ocr()
|
||||
|
||||
with open(self.path_floorplan_ocr_json()) as f:
|
||||
objs = json.load(f)
|
||||
|
||||
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones
|
||||
|
||||
sqms = [o['estimated_sqm'] for o in objs if o['estimated_sqm'] is not None]
|
||||
if len(sqms) == 0:
|
||||
return None
|
||||
max_sqm = max(sqms)
|
||||
return max_sqm
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return f'https://www.rightmove.co.uk/properties/{self.identifier}'
|
||||
|
||||
@property
|
||||
def detailobject(self):
|
||||
if self._cached is None:
|
||||
with open(self.path_detail_json()) as f:
|
||||
self._cached = json.load(f)
|
||||
return self._cached
|
||||
|
||||
@property
|
||||
def price(self) -> float:
|
||||
return self.detailobject['property']['price']
|
||||
|
||||
@property
|
||||
def price_per_sqm(self) -> float:
|
||||
if self.sqm_ocr is None:
|
||||
return None
|
||||
return self.price / self.sqm_ocr
|
||||
|
||||
def dict_nicely(self):
|
||||
return {
|
||||
'sqm_ocr': self.sqm_ocr,
|
||||
'price': self.price,
|
||||
'price_per_sqm': self.price_per_sqm,
|
||||
'url': self.url,
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
1030
crawler/exploration.ipynb
Normal file
1030
crawler/exploration.ipynb
Normal file
File diff suppressed because it is too large
Load diff
1747
crawler/poetry.lock
generated
1747
crawler/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -16,6 +16,8 @@ torch = "^2.2.1"
|
|||
numpy = "^1.26.4"
|
||||
transformers = "^4.38.2"
|
||||
pytesseract = "^0.3.10"
|
||||
jupyterlab = "^4.1.4"
|
||||
pandas = "^2.2.1"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue