Compare commits
7 commits
1680cda7b7
...
4b1e971edf
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b1e971edf | ||
|
|
22aa9c86a7 | ||
|
|
f88bba032f | ||
|
|
dc1186601a | ||
|
|
fe01df0e7e | ||
|
|
4969df8745 | ||
|
|
ccef50b371 |
3 changed files with 13 additions and 113 deletions
32
.drone.yml
32
.drone.yml
|
|
@ -1,62 +1,56 @@
|
|||
kind: pipeline
|
||||
type: kubernetes
|
||||
name: frontend
|
||||
timeout: 30m
|
||||
|
||||
trigger:
|
||||
event:
|
||||
- push
|
||||
- cron
|
||||
branch:
|
||||
- master
|
||||
- master
|
||||
event:
|
||||
- push
|
||||
|
||||
steps:
|
||||
- name: Build frontend image
|
||||
image: plugins/docker
|
||||
settings:
|
||||
username: "viktorbarzin"
|
||||
username: viktorbarzin
|
||||
password:
|
||||
from_secret: dockerhub-token
|
||||
repo: viktorbarzin/immoweb
|
||||
dockerfile: crawler/frontend/Dockerfile
|
||||
context: crawler/frontend
|
||||
cache_from: "viktorbarzin/immoweb:latest"
|
||||
auto_tag: true
|
||||
|
||||
- name: Update deployment
|
||||
image: alpine
|
||||
commands:
|
||||
- "apk add curl"
|
||||
- apk add curl
|
||||
- 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-ui -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head'
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
type: kubernetes
|
||||
name: api
|
||||
timeout: 30m
|
||||
|
||||
trigger:
|
||||
event:
|
||||
- push
|
||||
- cron
|
||||
branch:
|
||||
- master
|
||||
- master
|
||||
event:
|
||||
- push
|
||||
|
||||
steps:
|
||||
- name: Build backend API image
|
||||
- name: Build API image
|
||||
image: plugins/docker
|
||||
settings:
|
||||
username: "viktorbarzin"
|
||||
username: viktorbarzin
|
||||
password:
|
||||
from_secret: dockerhub-token
|
||||
repo: viktorbarzin/realestatecrawler
|
||||
dockerfile: crawler/Dockerfile
|
||||
context: crawler/
|
||||
cache_from: "viktorbarzin/realestatecrawler:latest"
|
||||
auto_tag: true
|
||||
|
||||
- name: Update deployment
|
||||
image: alpine
|
||||
commands:
|
||||
- "apk add curl"
|
||||
- apk add curl
|
||||
- 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-api -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head'
|
||||
# Sun Feb 1 08:29:56 PM UTC 2026
|
||||
# Webhook test Sun Feb 1 08:45:52 PM UTC 2026
|
||||
|
|
|
|||
|
|
@ -1,51 +0,0 @@
|
|||
import asyncio
|
||||
from pathlib import Path
|
||||
import aiohttp
|
||||
from repositories import ListingRepository
|
||||
from tenacity import retry, stop_after_attempt, wait_random
|
||||
from tqdm.asyncio import tqdm
|
||||
|
||||
from models import Listing
|
||||
|
||||
# Setting this too high either crashes rightmove or gets us blocked
|
||||
semaphore = asyncio.Semaphore(5)
|
||||
|
||||
|
||||
async def dump_images(
|
||||
repository: ListingRepository,
|
||||
image_base_path: Path = Path("data/rs/"),
|
||||
):
|
||||
listings = await repository.get_listings()
|
||||
updated_listings = await tqdm.gather(
|
||||
*[dump_images_for_listing(listing, image_base_path) for listing in listings]
|
||||
)
|
||||
await repository.upsert_listings(
|
||||
[listing for listing in updated_listings if listing is not None]
|
||||
)
|
||||
|
||||
|
||||
@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
|
||||
async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None:
|
||||
all_floorplans = listing.additional_info.get("property", {}).get("floorplans", [])
|
||||
for floorplan in all_floorplans:
|
||||
url = floorplan["url"]
|
||||
picname = url.split("/")[-1]
|
||||
floorplan_path = Path(base_path, str(listing.id), "floorplans", picname)
|
||||
if floorplan_path.exists():
|
||||
continue
|
||||
try:
|
||||
async with semaphore:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as response:
|
||||
if response.status == 404:
|
||||
return None
|
||||
if response.status != 200:
|
||||
raise Exception(f"Error for {url}: {response.status}")
|
||||
floorplan_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(floorplan_path, "wb") as f:
|
||||
f.write(await response.read())
|
||||
listing.floorplan_image_paths.append(str(floorplan_path))
|
||||
return listing
|
||||
except Exception as e:
|
||||
tqdm.write(f"Error for {url}: {e}")
|
||||
raise e # raise so that we retry it
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
import asyncio
|
||||
from models import Listing
|
||||
from rec import floorplan
|
||||
from repositories.listing_repository import ListingRepository
|
||||
from tqdm.asyncio import tqdm
|
||||
import multiprocessing
|
||||
|
||||
|
||||
async def detect_floorplan(repository: ListingRepository):
|
||||
listings = await repository.get_listings()
|
||||
cpu_count = multiprocessing.cpu_count() // 4
|
||||
semaphore = asyncio.Semaphore(cpu_count)
|
||||
|
||||
updated_listings = [
|
||||
listing
|
||||
for listing in await tqdm.gather(
|
||||
*[_calculate_sqm_ocr(listing, semaphore) for listing in listings]
|
||||
)
|
||||
if listing is not None
|
||||
]
|
||||
await repository.upsert_listings(updated_listings)
|
||||
|
||||
|
||||
async def _calculate_sqm_ocr(
|
||||
listing: Listing, semaphore: asyncio.Semaphore
|
||||
) -> Listing | None:
|
||||
if listing.square_meters is not None:
|
||||
return None
|
||||
sqms = []
|
||||
for floorplan_path in listing.floorplan_image_paths:
|
||||
async with semaphore:
|
||||
estimated_sqm, _ = await asyncio.to_thread(
|
||||
floorplan.calculate_ocr, floorplan_path
|
||||
)
|
||||
if estimated_sqm is not None:
|
||||
sqms.append(estimated_sqm)
|
||||
max_sqm = max(sqms, default=0) # try once, if we fail, keep as 0
|
||||
# if max_sqm is not None:
|
||||
listing.square_meters = max_sqm
|
||||
return listing
|
||||
# else:
|
||||
# listing.square_meters = None
|
||||
# return None
|
||||
Loading…
Add table
Add a link
Reference in a new issue