fix types and format
This commit is contained in:
parent
91d3237516
commit
b873eaf203
8 changed files with 117 additions and 172 deletions
|
|
@ -16,12 +16,13 @@ class QueryParameters:
|
||||||
district_names: set[str]
|
district_names: set[str]
|
||||||
radius: float = 0
|
radius: float = 0
|
||||||
page_size: int = 500 # items per page
|
page_size: int = 500 # items per page
|
||||||
max_days_since_added: int | None = None
|
max_days_since_added: int = 30
|
||||||
|
# available from; furnished/unfurnished; council tax
|
||||||
|
|
||||||
|
|
||||||
async def dump_listings(
|
async def dump_listings(
|
||||||
parameters: QueryParameters,
|
parameters: QueryParameters,
|
||||||
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
||||||
) -> list[Listing]:
|
) -> list[Listing]:
|
||||||
districts = {
|
districts = {
|
||||||
district: locid
|
district: locid
|
||||||
|
|
@ -31,29 +32,28 @@ async def dump_listings(
|
||||||
print("Valid districts to scrape:", districts.keys())
|
print("Valid districts to scrape:", districts.keys())
|
||||||
listings = []
|
listings = []
|
||||||
|
|
||||||
json_responses = await asyncio.gather(
|
json_responses = await asyncio.gather(*[
|
||||||
*[
|
listing_query(
|
||||||
listing_query(
|
page=i,
|
||||||
page=i,
|
channel=parameters.listing_type,
|
||||||
channel=parameters.listing_type,
|
min_bedrooms=parameters.min_bedrooms,
|
||||||
min_bedrooms=parameters.min_bedrooms,
|
max_bedrooms=parameters.max_bedrooms,
|
||||||
max_bedrooms=parameters.max_bedrooms,
|
radius=parameters.radius,
|
||||||
radius=parameters.radius,
|
min_price=parameters.min_price,
|
||||||
min_price=parameters.min_price,
|
max_price=parameters.max_price,
|
||||||
max_price=parameters.max_price,
|
location_id=locid,
|
||||||
location_id=locid,
|
page_size=parameters.page_size,
|
||||||
page_size=parameters.page_size,
|
max_days_since_added=parameters.max_days_since_added,
|
||||||
max_days_since_added=parameters.max_days_since_added,
|
) for locid in districts.values() for i in [1, 2]
|
||||||
) for locid in districts.values() for i in [1, 2]
|
])
|
||||||
]
|
|
||||||
)
|
|
||||||
listings = []
|
listings = []
|
||||||
for response_json in json_responses:
|
for response_json in json_responses:
|
||||||
if response_json["totalAvailableResults"] == 0:
|
if response_json["totalAvailableResults"] == 0:
|
||||||
print("No results found")
|
print("No results found")
|
||||||
continue
|
continue
|
||||||
if response_json["totalAvailableResults"] > 0:
|
if response_json["totalAvailableResults"] > 0:
|
||||||
print("totalAvailableResults: ", response_json["totalAvailableResults"])
|
print("totalAvailableResults: ",
|
||||||
|
response_json["totalAvailableResults"])
|
||||||
for property in response_json["properties"]:
|
for property in response_json["properties"]:
|
||||||
identifier = property["identifier"]
|
identifier = property["identifier"]
|
||||||
|
|
||||||
|
|
@ -62,11 +62,3 @@ async def dump_listings(
|
||||||
listings.append(listing)
|
listings.append(listing)
|
||||||
|
|
||||||
return listings
|
return listings
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
dump_listings()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import pathlib
|
|
||||||
from rec.query import detail_query
|
from rec.query import detail_query
|
||||||
from tqdm.asyncio import tqdm
|
from tqdm.asyncio import tqdm
|
||||||
|
|
||||||
|
|
@ -13,8 +12,7 @@ semaphore = asyncio.Semaphore(10)
|
||||||
async def dump_detail(listing_paths: list[str]):
|
async def dump_detail(listing_paths: list[str]):
|
||||||
listings = Listing.get_all_listings(listing_paths)
|
listings = Listing.get_all_listings(listing_paths)
|
||||||
filtered_listings = await tqdm.gather(
|
filtered_listings = await tqdm.gather(
|
||||||
*[_dump_detail_for_listing(listing) for listing in listings]
|
*[_dump_detail_for_listing(listing) for listing in listings])
|
||||||
)
|
|
||||||
return filtered_listings
|
return filtered_listings
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -27,12 +25,3 @@ async def _dump_detail_for_listing(listing: Listing):
|
||||||
d = await detail_query(listing.identifier)
|
d = await detail_query(listing.identifier)
|
||||||
with open(listing.path_detail_json(), "w") as f:
|
with open(listing.path_detail_json(), "w") as f:
|
||||||
json.dump(d, f)
|
json.dump(d, f)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
|
||||||
dump_detail(listing_paths)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import pathlib
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from tqdm.asyncio import tqdm
|
from tqdm.asyncio import tqdm
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
@ -11,7 +10,8 @@ semaphore = asyncio.Semaphore(10)
|
||||||
|
|
||||||
async def dump_images(listing_paths: list[str]):
|
async def dump_images(listing_paths: list[str]):
|
||||||
listings = Listing.get_all_listings(listing_paths)
|
listings = Listing.get_all_listings(listing_paths)
|
||||||
await tqdm.gather(*[dump_images_for_listing(listing) for listing in listings])
|
await tqdm.gather(
|
||||||
|
*[dump_images_for_listing(listing) for listing in listings])
|
||||||
|
|
||||||
|
|
||||||
async def dump_images_for_listing(listing: Listing):
|
async def dump_images_for_listing(listing: Listing):
|
||||||
|
|
@ -30,17 +30,9 @@ async def dump_images_for_listing(listing: Listing):
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
async with session.get(url) as response:
|
async with session.get(url) as response:
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
raise Exception(f"Error for {url}: {response.status}")
|
raise Exception(
|
||||||
|
f"Error for {url}: {response.status}")
|
||||||
with open(p, "wb") as f:
|
with open(p, "wb") as f:
|
||||||
f.write(await response.read())
|
f.write(await response.read())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tqdm.write(f"Error for {url}: {e}")
|
tqdm.write(f"Error for {url}: {e}")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
|
||||||
dump_images(listing_paths)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import pathlib
|
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
from tqdm.asyncio import tqdm
|
from tqdm.asyncio import tqdm
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
|
@ -7,25 +6,16 @@ import multiprocessing
|
||||||
|
|
||||||
async def detect_floorplan(listing_paths: list[str]):
|
async def detect_floorplan(listing_paths: list[str]):
|
||||||
listings = Listing.get_all_listings(listing_paths)
|
listings = Listing.get_all_listings(listing_paths)
|
||||||
cpu_count = multiprocessing.cpu_count() / 4
|
cpu_count = multiprocessing.cpu_count() // 4
|
||||||
semaphore = asyncio.Semaphore(cpu_count)
|
semaphore = asyncio.Semaphore(cpu_count)
|
||||||
|
|
||||||
await tqdm.gather(
|
await tqdm.gather(*[
|
||||||
*[_detect_floorplan_with_semaphore(listing, semaphore) for listing in listings]
|
_detect_floorplan_with_semaphore(listing, semaphore)
|
||||||
)
|
for listing in listings
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
async def _detect_floorplan_with_semaphore(
|
async def _detect_floorplan_with_semaphore(listing: Listing,
|
||||||
listing: Listing, semaphore: asyncio.Semaphore
|
semaphore: asyncio.Semaphore):
|
||||||
):
|
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
return await listing.calculate_sqm_ocr(recalculate=False)
|
return await listing.calculate_sqm_ocr(recalculate=False)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
|
||||||
detect_floorplan(listing_paths)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import pathlib
|
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from geopy.distance import geodesic
|
from geopy.distance import geodesic
|
||||||
|
|
@ -15,30 +14,30 @@ def calculate_route(listing_paths: list[str]):
|
||||||
# reduce listings to everything within 7 miles
|
# reduce listings to everything within 7 miles
|
||||||
filtered_listings = []
|
filtered_listings = []
|
||||||
for listing in listings:
|
for listing in listings:
|
||||||
miles = geodesic(
|
miles = geodesic(BROCK_STREET_LAT_LONG,
|
||||||
BROCK_STREET_LAT_LONG, (listing.latitude, listing.longitude)
|
(listing.latitude, listing.longitude)).miles
|
||||||
).miles
|
|
||||||
if listing.isRemoved:
|
if listing.isRemoved:
|
||||||
log.info(f"Removed-Skip: Skipping {listing.identifier} is already removed.")
|
log.info(f"Removed-Skip: Skipping {listing.identifier} "
|
||||||
|
"is already removed.")
|
||||||
continue
|
continue
|
||||||
if miles > 7:
|
if miles > 7:
|
||||||
log.info(
|
log.info(f"Miles-Skip: Skipping {listing.identifier} as it is "
|
||||||
f"Miles-Skip: Skipping {listing.identifier} as it is {miles} miles away"
|
f"{miles} miles away")
|
||||||
)
|
|
||||||
continue
|
continue
|
||||||
if listing.path_routing_json().exists():
|
if listing.path_routing_json().exists():
|
||||||
log.info(
|
log.info(
|
||||||
f"Path-Skip: Skipping {listing.identifier} as path routing already exists"
|
(f"Path-Skip: Skipping {listing.identifier} as path routing "
|
||||||
)
|
"already exists"))
|
||||||
continue
|
continue
|
||||||
if listing.sqm_ocr is None or listing.sqm_ocr < 30 or listing.sqm_ocr > 200:
|
if (listing.sqm_ocr is None or listing.sqm_ocr < 30
|
||||||
log.info(
|
or listing.sqm_ocr > 200):
|
||||||
f"Floorplan-Skip: Skipping {listing.identifier} as sqm_ocr is {listing.sqm_ocr}"
|
log.info((f"Floorplan-Skip: Skipping {listing.identifier} as "
|
||||||
)
|
f"sqm_ocr is {listing.sqm_ocr}"))
|
||||||
continue
|
continue
|
||||||
filtered_listings.append(listing)
|
filtered_listings.append(listing)
|
||||||
|
|
||||||
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
|
print(
|
||||||
|
f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
|
||||||
|
|
||||||
for listing in tqdm(filtered_listings):
|
for listing in tqdm(filtered_listings):
|
||||||
lat, long = BROCK_STREET_LAT_LONG
|
lat, long = BROCK_STREET_LAT_LONG
|
||||||
|
|
@ -47,12 +46,3 @@ def calculate_route(listing_paths: list[str]):
|
||||||
duration_minutes = traveltime["duration"] / 60.0
|
duration_minutes = traveltime["duration"] / 60.0
|
||||||
|
|
||||||
tqdm.write(f"{listing.identifier} {duration_minutes}")
|
tqdm.write(f"{listing.identifier} {duration_minutes}")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
|
||||||
calculate_route(listing_paths)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,9 @@ import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
def export_to_csv(
|
def export_to_csv(
|
||||||
listings: list[Listing], output_file: Path, columns: list[str]
|
listings: list[Listing],
|
||||||
|
output_file: Path,
|
||||||
|
columns: list[str],
|
||||||
) -> None:
|
) -> None:
|
||||||
ds = [listing.dict_nicely() for listing in listings]
|
ds = [listing.dict_nicely() for listing in listings]
|
||||||
df = pd.DataFrame(ds)
|
df = pd.DataFrame(ds)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ import asyncio
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import json
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import List, Dict
|
from typing import Any, List, Dict
|
||||||
from rec import floorplan, routing
|
from rec import floorplan, routing
|
||||||
import re
|
import re
|
||||||
import datetime
|
import datetime
|
||||||
|
|
@ -11,7 +11,7 @@ import datetime
|
||||||
@dataclass()
|
@dataclass()
|
||||||
class Listing:
|
class Listing:
|
||||||
identifier: int
|
identifier: int
|
||||||
_cached: Dict = None
|
_cached: Dict | None = None
|
||||||
data_dir: pathlib.Path = pathlib.Path("data/rs/")
|
data_dir: pathlib.Path = pathlib.Path("data/rs/")
|
||||||
ALL_COLUMNS = [
|
ALL_COLUMNS = [
|
||||||
"identifier",
|
"identifier",
|
||||||
|
|
@ -46,10 +46,8 @@ class Listing:
|
||||||
while str(d['identifier']) in str(data_dir.resolve().absolute()):
|
while str(d['identifier']) in str(data_dir.resolve().absolute()):
|
||||||
data_dir = data_dir.parent
|
data_dir = data_dir.parent
|
||||||
listing = Listing(d["identifier"], data_dir=data_dir)
|
listing = Listing(d["identifier"], data_dir=data_dir)
|
||||||
if (
|
if (listing.last_seen is not None
|
||||||
listing.last_seen is not None
|
and listing.last_seen < seen_in_the_last_n_days):
|
||||||
and listing.last_seen < seen_in_the_last_n_days
|
|
||||||
):
|
|
||||||
identifiers.append(listing)
|
identifiers.append(listing)
|
||||||
|
|
||||||
return identifiers
|
return identifiers
|
||||||
|
|
@ -107,18 +105,15 @@ class Listing:
|
||||||
objs = []
|
objs = []
|
||||||
for floorplan_path in self.list_floorplans():
|
for floorplan_path in self.list_floorplans():
|
||||||
estimated_sqm, model_output, predictions = floorplan.calculate_model(
|
estimated_sqm, model_output, predictions = floorplan.calculate_model(
|
||||||
floorplan_path
|
floorplan_path)
|
||||||
)
|
objs.append({
|
||||||
objs.append(
|
"floorplan_path": str(floorplan_path),
|
||||||
{
|
"estimated_sqm": estimated_sqm,
|
||||||
"floorplan_path": str(floorplan_path),
|
"model_output": model_output,
|
||||||
"estimated_sqm": estimated_sqm,
|
"no_predictions": len(
|
||||||
"model_output": model_output,
|
predictions
|
||||||
"no_predictions": len(
|
), # cant serialize the predictions itself since its a tensor
|
||||||
predictions
|
})
|
||||||
), # cant serialize the predictions itself since its a tensor
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(self.path_floorplan_model_json(), "w") as f:
|
with open(self.path_floorplan_model_json(), "w") as f:
|
||||||
json.dump(objs, f)
|
json.dump(objs, f)
|
||||||
|
|
@ -131,9 +126,8 @@ class Listing:
|
||||||
with open(self.path_floorplan_json()) as f:
|
with open(self.path_floorplan_json()) as f:
|
||||||
objs = json.load(f)
|
objs = json.load(f)
|
||||||
|
|
||||||
max_sqm = max(
|
max_sqm = max([o["estimated_sqm"] for o in objs
|
||||||
[o["estimated_sqm"] for o in objs if o is None]
|
if o is None]) # filter out Nones
|
||||||
) # filter out Nones
|
|
||||||
return max_sqm
|
return max_sqm
|
||||||
|
|
||||||
async def calculate_sqm_ocr(self, recalculate=True):
|
async def calculate_sqm_ocr(self, recalculate=True):
|
||||||
|
|
@ -143,15 +137,12 @@ class Listing:
|
||||||
objs = []
|
objs = []
|
||||||
for floorplan_path in self.list_floorplans():
|
for floorplan_path in self.list_floorplans():
|
||||||
estimated_sqm, model_output = await asyncio.to_thread(
|
estimated_sqm, model_output = await asyncio.to_thread(
|
||||||
floorplan.calculate_ocr, floorplan_path
|
floorplan.calculate_ocr, floorplan_path)
|
||||||
)
|
objs.append({
|
||||||
objs.append(
|
"floorplan_path": str(floorplan_path),
|
||||||
{
|
"estimated_sqm": estimated_sqm,
|
||||||
"floorplan_path": str(floorplan_path),
|
"text": model_output,
|
||||||
"estimated_sqm": estimated_sqm,
|
})
|
||||||
"text": model_output,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(self.path_floorplan_ocr_json(), "w") as f:
|
with open(self.path_floorplan_ocr_json(), "w") as f:
|
||||||
json.dump(objs, f)
|
json.dump(objs, f)
|
||||||
|
|
@ -164,19 +155,23 @@ class Listing:
|
||||||
with open(self.path_floorplan_ocr_json()) as f:
|
with open(self.path_floorplan_ocr_json()) as f:
|
||||||
objs = json.load(f)
|
objs = json.load(f)
|
||||||
|
|
||||||
sqms = [o["estimated_sqm"] for o in objs if o["estimated_sqm"] is not None]
|
sqms = [
|
||||||
|
o["estimated_sqm"] for o in objs if o["estimated_sqm"] is not None
|
||||||
|
]
|
||||||
if len(sqms) == 0:
|
if len(sqms) == 0:
|
||||||
return None
|
return None
|
||||||
max_sqm = max(sqms)
|
max_sqm = max(sqms)
|
||||||
return max_sqm
|
return max_sqm
|
||||||
|
|
||||||
def calculate_route(self, dest_lat: float, dest_lon: float, recalculate=False):
|
def calculate_route(self,
|
||||||
|
dest_lat: float,
|
||||||
|
dest_lon: float,
|
||||||
|
recalculate=False):
|
||||||
if self.path_routing_json().exists() and not recalculate:
|
if self.path_routing_json().exists() and not recalculate:
|
||||||
return
|
return
|
||||||
|
|
||||||
result = routing.transit_route(
|
result = routing.transit_route(self.latitude, self.longitude, dest_lat,
|
||||||
self.latitude, self.longitude, dest_lat, dest_lon
|
dest_lon)
|
||||||
)
|
|
||||||
with open(self.path_routing_json(), "w") as f:
|
with open(self.path_routing_json(), "w") as f:
|
||||||
json.dump(result, f)
|
json.dump(result, f)
|
||||||
|
|
||||||
|
|
@ -200,11 +195,11 @@ class Listing:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def detailobject(self):
|
def detailobject(self) -> dict[str, Any]:
|
||||||
if self._cached is None:
|
if self._cached is None:
|
||||||
with open(self.path_detail_json()) as f:
|
with open(self.path_detail_json()) as f:
|
||||||
self._cached = json.load(f)
|
self._cached = json.load(f)
|
||||||
return self._cached
|
return self._cached # type: ignore
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def price(self) -> float:
|
def price(self) -> float:
|
||||||
|
|
@ -217,7 +212,7 @@ class Listing:
|
||||||
@property
|
@property
|
||||||
def price_per_sqm(self) -> float:
|
def price_per_sqm(self) -> float:
|
||||||
if self.sqm_ocr is None or self.sqm_ocr == 0:
|
if self.sqm_ocr is None or self.sqm_ocr == 0:
|
||||||
return None
|
return -1
|
||||||
return self.price / self.sqm_ocr
|
return self.price / self.sqm_ocr
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -233,8 +228,9 @@ class Listing:
|
||||||
return self.detailobject["property"]["longitude"]
|
return self.detailobject["property"]["longitude"]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def leaseLeft(self) -> int:
|
def leaseLeft(self) -> float | None:
|
||||||
ds = self.detailobject["property"].get("tenureInfo", {}).get("content", [])
|
ds = self.detailobject["property"].get("tenureInfo",
|
||||||
|
{}).get("content", [])
|
||||||
for d in ds:
|
for d in ds:
|
||||||
if d["type"] == "lengthOfLease":
|
if d["type"] == "lengthOfLease":
|
||||||
matches = re.findall(r"(\d+\.?\d*)", d["value"])
|
matches = re.findall(r"(\d+\.?\d*)", d["value"])
|
||||||
|
|
@ -250,7 +246,7 @@ class Listing:
|
||||||
return (now - ds).days
|
return (now - ds).days
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def last_seen(self) -> int:
|
def last_seen(self) -> int | None:
|
||||||
if not self.path_last_seen_listing().exists():
|
if not self.path_last_seen_listing().exists():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -260,8 +256,9 @@ class Listing:
|
||||||
return (datetime.datetime.now() - dt).days
|
return (datetime.datetime.now() - dt).days
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def serviceCharge(self) -> float:
|
def serviceCharge(self) -> float | None:
|
||||||
ds = self.detailobject["property"].get("tenureInfo", {}).get("content", [])
|
ds = self.detailobject["property"].get("tenureInfo",
|
||||||
|
{}).get("content", [])
|
||||||
for d in ds:
|
for d in ds:
|
||||||
if d["type"] == "annualServiceCharge":
|
if d["type"] == "annualServiceCharge":
|
||||||
matches = re.findall(r"([\d,.]+)", d["value"])
|
matches = re.findall(r"([\d,.]+)", d["value"])
|
||||||
|
|
@ -276,8 +273,7 @@ class Listing:
|
||||||
# aka new home
|
# aka new home
|
||||||
try:
|
try:
|
||||||
return self.detailobject["property"]["development"]
|
return self.detailobject["property"]["development"]
|
||||||
except:
|
except Exception:
|
||||||
print(self.identifier)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -294,39 +290,33 @@ class Listing:
|
||||||
def dict_nicely(self):
|
def dict_nicely(self):
|
||||||
return {
|
return {
|
||||||
"identifier":
|
"identifier":
|
||||||
self.identifier,
|
self.identifier,
|
||||||
"sqm_ocr":
|
"sqm_ocr":
|
||||||
self.sqm_ocr,
|
self.sqm_ocr,
|
||||||
"price":
|
"price":
|
||||||
self.price,
|
self.price,
|
||||||
"price_per_sqm":
|
"price_per_sqm":
|
||||||
self.price_per_sqm,
|
self.price_per_sqm,
|
||||||
"url":
|
"url":
|
||||||
self.url,
|
self.url,
|
||||||
"bedrooms":
|
"bedrooms":
|
||||||
self.bedrooms,
|
self.bedrooms,
|
||||||
"travel_time_fastest":
|
"travel_time_fastest":
|
||||||
None if len(self.travel_time) == 0 else self.travel_time[0],
|
None if len(self.travel_time) == 0 else self.travel_time[0],
|
||||||
"travel_time_second":
|
"travel_time_second":
|
||||||
None if len(self.travel_time) < 2 else self.travel_time[1],
|
None if len(self.travel_time) < 2 else self.travel_time[1],
|
||||||
"lease_left":
|
"lease_left":
|
||||||
self.leaseLeft,
|
self.leaseLeft,
|
||||||
"service_charge":
|
"service_charge":
|
||||||
self.serviceCharge,
|
self.serviceCharge,
|
||||||
"development":
|
"development":
|
||||||
self.development,
|
self.development,
|
||||||
"tenure_type":
|
"tenure_type":
|
||||||
self.tenure_type,
|
self.tenure_type,
|
||||||
"updated_days":
|
"updated_days":
|
||||||
self.updateDaysAgo,
|
self.updateDaysAgo,
|
||||||
"status":
|
"status":
|
||||||
self.status,
|
self.status,
|
||||||
"last_seen":
|
"last_seen":
|
||||||
self.last_seen,
|
self.last_seen,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
listing_paths = sorted(list(pathlib.Path("data/rs").glob("*/listing.json")))
|
|
||||||
listings = Listing.get_all_listings()
|
|
||||||
print(listings[0].list_floorplans())
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
# from diskcache import Cache
|
# from diskcache import Cache
|
||||||
import enum
|
import enum
|
||||||
from typing import List
|
from typing import Any, List
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import requests
|
import requests
|
||||||
import urllib3
|
import urllib3
|
||||||
|
|
||||||
urllib3.disable_warnings()
|
urllib3.disable_warnings() # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class ListingType(enum.StrEnum):
|
class ListingType(enum.StrEnum):
|
||||||
|
|
@ -38,12 +38,12 @@ async def detail_query(detail_id: int):
|
||||||
}
|
}
|
||||||
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
url = f"https://api.rightmove.co.uk/api/property/{detail_id}"
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(url, params=params, headers=headers) as response:
|
async with session.get(url, params=params,
|
||||||
|
headers=headers) as response:
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"""id: {detail_id}. Status Code: {response.status}."""
|
f"""id: {detail_id}. Status Code: {response.status}."""
|
||||||
f"""Failed due to: {await response.text()}"""
|
f"""Failed due to: {await response.text()}""")
|
||||||
)
|
|
||||||
return await response.json()
|
return await response.json()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -57,11 +57,11 @@ async def listing_query(
|
||||||
max_price: int,
|
max_price: int,
|
||||||
location_id: str = "STATION^5168", # kings cross station
|
location_id: str = "STATION^5168", # kings cross station
|
||||||
mustNewHome: bool = False,
|
mustNewHome: bool = False,
|
||||||
max_days_since_added: int = None,
|
max_days_since_added: int = 30,
|
||||||
property_type: List["PropertyType"] = [],
|
property_type: List["PropertyType"] = [],
|
||||||
page_size=25,
|
page_size: int = 25,
|
||||||
) -> dict:
|
) -> dict[str, Any]:
|
||||||
params = {
|
params: dict[str, str] = {
|
||||||
"locationIdentifier": location_id,
|
"locationIdentifier": location_id,
|
||||||
"channel": channel.upper(),
|
"channel": channel.upper(),
|
||||||
"page": str(page),
|
"page": str(page),
|
||||||
|
|
@ -77,14 +77,14 @@ async def listing_query(
|
||||||
"appVersion": "4.28.0",
|
"appVersion": "4.28.0",
|
||||||
}
|
}
|
||||||
if channel is ListingType.BUY:
|
if channel is ListingType.BUY:
|
||||||
params["dontShow"] = "sharedOwnership,retirement",
|
params["dontShow"] = "sharedOwnership,retirement"
|
||||||
if len(property_type) > 0:
|
if len(property_type) > 0:
|
||||||
params["propertyTypes"] = ",".join(property_type)
|
params["propertyTypes"] = ",".join(property_type)
|
||||||
if max_days_since_added is not None and max_days_since_added not in [
|
if max_days_since_added is not None and max_days_since_added not in [
|
||||||
1, 3, 7, 14
|
1, 3, 7, 14
|
||||||
]:
|
]:
|
||||||
raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])
|
raise Exception("Invalid max days. Can only be", [1, 3, 7, 14])
|
||||||
params["maxDaysSinceAdded"] = max_days_since_added
|
params["maxDaysSinceAdded"] = str(max_days_since_added)
|
||||||
|
|
||||||
if mustNewHome:
|
if mustNewHome:
|
||||||
params["mustHave"] = "newHome"
|
params["mustHave"] = "newHome"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue