ruff format
This commit is contained in:
parent
37e3e8ad6f
commit
d777558b34
17 changed files with 411 additions and 368 deletions
|
|
@ -5,202 +5,213 @@ from typing import List, Dict
|
|||
from rec import floorplan, routing
|
||||
import re
|
||||
|
||||
_DATA_DIR = pathlib.Path('data/rs/')
|
||||
_DATA_DIR = pathlib.Path("data/rs/")
|
||||
|
||||
|
||||
@dataclass()
|
||||
class Listing():
|
||||
class Listing:
|
||||
identifier: int
|
||||
_cached: Dict = None
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_all_listings() -> List['Listing']:
|
||||
listing_paths = sorted(list(_DATA_DIR.glob('*/listing.json')))
|
||||
def get_all_listings() -> List["Listing"]:
|
||||
listing_paths = sorted(list(_DATA_DIR.glob("*/listing.json")))
|
||||
identifiers = []
|
||||
for listing_path in listing_paths:
|
||||
with open(listing_path) as f:
|
||||
d = json.load(f)
|
||||
identifiers.append(Listing(d['identifier']))
|
||||
|
||||
identifiers.append(Listing(d["identifier"]))
|
||||
|
||||
return identifiers
|
||||
|
||||
def path_listing(self) -> pathlib.Path:
|
||||
p = _DATA_DIR / str(self.identifier)
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
return p
|
||||
|
||||
|
||||
def path_listing_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'listing.json'
|
||||
|
||||
return self.path_listing() / "listing.json"
|
||||
|
||||
def path_detail_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'detail.json'
|
||||
|
||||
return self.path_listing() / "detail.json"
|
||||
|
||||
def path_routing_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'routing.json'
|
||||
|
||||
return self.path_listing() / "routing.json"
|
||||
|
||||
def path_floorplan_model_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'floorplan_model.json'
|
||||
|
||||
return self.path_listing() / "floorplan_model.json"
|
||||
|
||||
def path_floorplan_ocr_json(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'floorplan_ocr.json'
|
||||
|
||||
return self.path_listing() / "floorplan_ocr.json"
|
||||
|
||||
def path_pic_folder(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'pics'
|
||||
|
||||
return self.path_listing() / "pics"
|
||||
|
||||
def path_pic_file(self, order, name) -> pathlib.Path:
|
||||
self.path_pic_folder().mkdir(parents=True, exist_ok=True)
|
||||
return self.path_pic_folder() / f'{order}_{name}'
|
||||
|
||||
return self.path_pic_folder() / f"{order}_{name}"
|
||||
|
||||
def path_floorplan_folder(self) -> pathlib.Path:
|
||||
return self.path_listing() / 'floorplans'
|
||||
|
||||
return self.path_listing() / "floorplans"
|
||||
|
||||
def path_floorplan_file(self, order, name) -> pathlib.Path:
|
||||
self.path_floorplan_folder().mkdir(parents=True, exist_ok=True)
|
||||
return self.path_floorplan_folder() / f'{order}_{name}'
|
||||
|
||||
return self.path_floorplan_folder() / f"{order}_{name}"
|
||||
|
||||
def list_floorplans(self):
|
||||
images = list(self.path_floorplan_folder().glob('*'))
|
||||
images = list(self.path_floorplan_folder().glob("*"))
|
||||
# todo add check if return is image
|
||||
return images
|
||||
|
||||
|
||||
def calculate_sqm_model(self):
|
||||
objs = []
|
||||
for floorplan_path in self.list_floorplans():
|
||||
estimated_sqm, model_output, predictions = floorplan.calculate_model(floorplan_path)
|
||||
objs.append({
|
||||
'floorplan_path': str(floorplan_path),
|
||||
'estimated_sqm': estimated_sqm,
|
||||
'model_output': model_output,
|
||||
'no_predictions': len(predictions) # cant serialize the predictions itself since its a tensor
|
||||
})
|
||||
|
||||
with open(self.path_floorplan_model_json(), 'w') as f:
|
||||
estimated_sqm, model_output, predictions = floorplan.calculate_model(
|
||||
floorplan_path
|
||||
)
|
||||
objs.append(
|
||||
{
|
||||
"floorplan_path": str(floorplan_path),
|
||||
"estimated_sqm": estimated_sqm,
|
||||
"model_output": model_output,
|
||||
"no_predictions": len(
|
||||
predictions
|
||||
), # cant serialize the predictions itself since its a tensor
|
||||
}
|
||||
)
|
||||
|
||||
with open(self.path_floorplan_model_json(), "w") as f:
|
||||
json.dump(objs, f)
|
||||
|
||||
|
||||
@property
|
||||
def sqm_model(self, recalculate=True):
|
||||
if not self.path_floorplan_model_json().exists() or recalculate:
|
||||
self.calculate_sqm_model()
|
||||
|
||||
|
||||
with open(self.path_floorplan_json()) as f:
|
||||
objs = json.load(f)
|
||||
|
||||
max_sqm = max([o['estimated_sqm'] for o in objs if o is None]) # filter out Nones
|
||||
|
||||
max_sqm = max(
|
||||
[o["estimated_sqm"] for o in objs if o is None]
|
||||
) # filter out Nones
|
||||
return max_sqm
|
||||
|
||||
|
||||
def calculate_sqm_ocr(self, recalculate=True):
|
||||
if not recalculate and self.path_floorplan_ocr_json().exists():
|
||||
return
|
||||
|
||||
|
||||
objs = []
|
||||
for floorplan_path in self.list_floorplans():
|
||||
estimated_sqm, model_output = floorplan.calculate_ocr(floorplan_path)
|
||||
objs.append({
|
||||
'floorplan_path': str(floorplan_path),
|
||||
'estimated_sqm': estimated_sqm,
|
||||
'text': model_output,
|
||||
})
|
||||
|
||||
with open(self.path_floorplan_ocr_json(), 'w') as f:
|
||||
objs.append(
|
||||
{
|
||||
"floorplan_path": str(floorplan_path),
|
||||
"estimated_sqm": estimated_sqm,
|
||||
"text": model_output,
|
||||
}
|
||||
)
|
||||
|
||||
with open(self.path_floorplan_ocr_json(), "w") as f:
|
||||
json.dump(objs, f)
|
||||
|
||||
|
||||
@property
|
||||
def sqm_ocr(self, recalculate=False):
|
||||
if not self.path_floorplan_ocr_json().exists() or recalculate:
|
||||
self.calculate_sqm_ocr()
|
||||
|
||||
|
||||
with open(self.path_floorplan_ocr_json()) as f:
|
||||
objs = json.load(f)
|
||||
|
||||
|
||||
sqms = [o['estimated_sqm'] for o in objs if o['estimated_sqm'] is not None]
|
||||
|
||||
sqms = [o["estimated_sqm"] for o in objs if o["estimated_sqm"] is not None]
|
||||
if len(sqms) == 0:
|
||||
return None
|
||||
max_sqm = max(sqms)
|
||||
return max_sqm
|
||||
|
||||
|
||||
def calculate_route(self, dest_lat: float, dest_lon: float, recalculate=False):
|
||||
if self.path_routing_json().exists() and not recalculate:
|
||||
return
|
||||
|
||||
result = routing.transit_route(self.latitude, self.longitude, dest_lat, dest_lon)
|
||||
with open(self.path_routing_json(), 'w') as f:
|
||||
|
||||
result = routing.transit_route(
|
||||
self.latitude, self.longitude, dest_lat, dest_lon
|
||||
)
|
||||
with open(self.path_routing_json(), "w") as f:
|
||||
json.dump(result, f)
|
||||
|
||||
|
||||
@property
|
||||
def travel_time(self) -> List:
|
||||
if not self.path_routing_json().exists():
|
||||
return []
|
||||
with open(self.path_routing_json()) as f:
|
||||
d = json.load(f)
|
||||
|
||||
|
||||
return routing.extract_time(d)
|
||||
|
||||
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return f'https://www.rightmove.co.uk/properties/{self.identifier}'
|
||||
|
||||
return f"https://www.rightmove.co.uk/properties/{self.identifier}"
|
||||
|
||||
@property
|
||||
def detailobject(self):
|
||||
if self._cached is None:
|
||||
with open(self.path_detail_json()) as f:
|
||||
self._cached = json.load(f)
|
||||
return self._cached
|
||||
|
||||
|
||||
@property
|
||||
def price(self) -> float:
|
||||
return self.detailobject['property']['price']
|
||||
|
||||
return self.detailobject["property"]["price"]
|
||||
|
||||
@property
|
||||
def price_per_sqm(self) -> float:
|
||||
if self.sqm_ocr is None or self.sqm_ocr == 0:
|
||||
return None
|
||||
return self.price / self.sqm_ocr
|
||||
|
||||
|
||||
@property
|
||||
def bedrooms(self) -> int:
|
||||
return self.detailobject['property']['bedrooms']
|
||||
|
||||
return self.detailobject["property"]["bedrooms"]
|
||||
|
||||
@property
|
||||
def latitude(self) -> float:
|
||||
return self.detailobject['property']['latitude']
|
||||
|
||||
return self.detailobject["property"]["latitude"]
|
||||
|
||||
@property
|
||||
def longitude(self) -> float:
|
||||
return self.detailobject['property']['longitude']
|
||||
|
||||
return self.detailobject["property"]["longitude"]
|
||||
|
||||
@property
|
||||
def leaseLeft(self) -> int:
|
||||
ds = self.detailobject['property'].get('tenureInfo', {}).get('content', [])
|
||||
ds = self.detailobject["property"].get("tenureInfo", {}).get("content", [])
|
||||
for d in ds:
|
||||
if d['type'] == 'lengthOfLease':
|
||||
matches = re.findall(r'(\d+\.?\d*)', d['value'])
|
||||
if d["type"] == "lengthOfLease":
|
||||
matches = re.findall(r"(\d+\.?\d*)", d["value"])
|
||||
if len(matches):
|
||||
return float(matches[0])
|
||||
return None
|
||||
|
||||
|
||||
@property
|
||||
def development(self) -> bool:
|
||||
# aka new home
|
||||
return self.detailobject['property']['development']
|
||||
|
||||
return self.detailobject["property"]["development"]
|
||||
|
||||
def dict_nicely(self):
|
||||
return {
|
||||
'identifier': self.identifier,
|
||||
'sqm_ocr': self.sqm_ocr,
|
||||
'price': self.price,
|
||||
'price_per_sqm': self.price_per_sqm,
|
||||
'url': self.url,
|
||||
'bedrooms': self.bedrooms,
|
||||
'travel_time_fastest': self.travel_time[0],
|
||||
'travel_time_second': None if len(self.travel_time) < 2 else self.travel_time[1],
|
||||
'lease_left': self.leaseLeft,
|
||||
'development': self.development,
|
||||
"identifier": self.identifier,
|
||||
"sqm_ocr": self.sqm_ocr,
|
||||
"price": self.price,
|
||||
"price_per_sqm": self.price_per_sqm,
|
||||
"url": self.url,
|
||||
"bedrooms": self.bedrooms,
|
||||
"travel_time_fastest": self.travel_time[0],
|
||||
"travel_time_second": None
|
||||
if len(self.travel_time) < 2
|
||||
else self.travel_time[1],
|
||||
"lease_left": self.leaseLeft,
|
||||
"development": self.development,
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
listings = Listing.get_all_listings()
|
||||
print(listings[0].list_floorplans())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue