Compare commits

..

7 commits

Author SHA1 Message Date
Viktor Barzin
4b1e971edf
delete detect floorplan as it was renamed 2026-02-01 21:58:16 +00:00
Viktor Barzin
22aa9c86a7
update drone.yaml to build on push 2026-02-01 21:53:18 +00:00
Viktor Barzin
f88bba032f
Test without secret 2026-02-01 20:58:20 +00:00
Viktor Barzin
dc1186601a Final webhook test 2026-02-01 20:54:53 +00:00
Viktor Barzin
fe01df0e7e
delete dump images as it is renamed 2026-02-01 20:53:40 +00:00
Viktor Barzin
4969df8745 Test new webhook 2026-02-01 20:51:13 +00:00
Viktor Barzin
ccef50b371 Trigger webhook 2026-02-01 20:47:50 +00:00
3 changed files with 13 additions and 113 deletions

View file

@ -1,62 +1,56 @@
kind: pipeline
type: kubernetes
name: frontend
timeout: 30m
trigger:
event:
- push
- cron
branch:
- master
- master
event:
- push
steps:
- name: Build frontend image
image: plugins/docker
settings:
username: "viktorbarzin"
username: viktorbarzin
password:
from_secret: dockerhub-token
repo: viktorbarzin/immoweb
dockerfile: crawler/frontend/Dockerfile
context: crawler/frontend
cache_from: "viktorbarzin/immoweb:latest"
auto_tag: true
- name: Update deployment
image: alpine
commands:
- "apk add curl"
- apk add curl
- 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-ui -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head'
---
kind: pipeline
type: kubernetes
name: api
timeout: 30m
trigger:
event:
- push
- cron
branch:
- master
- master
event:
- push
steps:
- name: Build backend API image
- name: Build API image
image: plugins/docker
settings:
username: "viktorbarzin"
username: viktorbarzin
password:
from_secret: dockerhub-token
repo: viktorbarzin/realestatecrawler
dockerfile: crawler/Dockerfile
context: crawler/
cache_from: "viktorbarzin/realestatecrawler:latest"
auto_tag: true
- name: Update deployment
image: alpine
commands:
- "apk add curl"
- apk add curl
- 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-api -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head'
# Sun Feb 1 08:29:56 PM UTC 2026
# Webhook test Sun Feb 1 08:45:52 PM UTC 2026

View file

@ -1,51 +0,0 @@
import asyncio
from pathlib import Path
import aiohttp
from repositories import ListingRepository
from tenacity import retry, stop_after_attempt, wait_random
from tqdm.asyncio import tqdm
from models import Listing
# Setting this too high either crashes rightmove or gets us blocked
semaphore = asyncio.Semaphore(5)
async def dump_images(
repository: ListingRepository,
image_base_path: Path = Path("data/rs/"),
):
listings = await repository.get_listings()
updated_listings = await tqdm.gather(
*[dump_images_for_listing(listing, image_base_path) for listing in listings]
)
await repository.upsert_listings(
[listing for listing in updated_listings if listing is not None]
)
@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None:
all_floorplans = listing.additional_info.get("property", {}).get("floorplans", [])
for floorplan in all_floorplans:
url = floorplan["url"]
picname = url.split("/")[-1]
floorplan_path = Path(base_path, str(listing.id), "floorplans", picname)
if floorplan_path.exists():
continue
try:
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status == 404:
return None
if response.status != 200:
raise Exception(f"Error for {url}: {response.status}")
floorplan_path.parent.mkdir(parents=True, exist_ok=True)
with open(floorplan_path, "wb") as f:
f.write(await response.read())
listing.floorplan_image_paths.append(str(floorplan_path))
return listing
except Exception as e:
tqdm.write(f"Error for {url}: {e}")
raise e # raise so that we retry it

View file

@ -1,43 +0,0 @@
import asyncio
from models import Listing
from rec import floorplan
from repositories.listing_repository import ListingRepository
from tqdm.asyncio import tqdm
import multiprocessing
async def detect_floorplan(repository: ListingRepository):
listings = await repository.get_listings()
cpu_count = multiprocessing.cpu_count() // 4
semaphore = asyncio.Semaphore(cpu_count)
updated_listings = [
listing
for listing in await tqdm.gather(
*[_calculate_sqm_ocr(listing, semaphore) for listing in listings]
)
if listing is not None
]
await repository.upsert_listings(updated_listings)
async def _calculate_sqm_ocr(
listing: Listing, semaphore: asyncio.Semaphore
) -> Listing | None:
if listing.square_meters is not None:
return None
sqms = []
for floorplan_path in listing.floorplan_image_paths:
async with semaphore:
estimated_sqm, _ = await asyncio.to_thread(
floorplan.calculate_ocr, floorplan_path
)
if estimated_sqm is not None:
sqms.append(estimated_sqm)
max_sqm = max(sqms, default=0) # try once, if we fail, keep as 0
# if max_sqm is not None:
listing.square_meters = max_sqm
return listing
# else:
# listing.square_meters = None
# return None