Compare commits
7 commits
1680cda7b7
...
4b1e971edf
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b1e971edf | ||
|
|
22aa9c86a7 | ||
|
|
f88bba032f | ||
|
|
dc1186601a | ||
|
|
fe01df0e7e | ||
|
|
4969df8745 | ||
|
|
ccef50b371 |
3 changed files with 13 additions and 113 deletions
32
.drone.yml
32
.drone.yml
|
|
@ -1,62 +1,56 @@
|
||||||
kind: pipeline
|
kind: pipeline
|
||||||
type: kubernetes
|
type: kubernetes
|
||||||
name: frontend
|
name: frontend
|
||||||
timeout: 30m
|
|
||||||
|
|
||||||
trigger:
|
trigger:
|
||||||
event:
|
|
||||||
- push
|
|
||||||
- cron
|
|
||||||
branch:
|
branch:
|
||||||
- master
|
- master
|
||||||
|
event:
|
||||||
|
- push
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Build frontend image
|
- name: Build frontend image
|
||||||
image: plugins/docker
|
image: plugins/docker
|
||||||
settings:
|
settings:
|
||||||
username: "viktorbarzin"
|
username: viktorbarzin
|
||||||
password:
|
password:
|
||||||
from_secret: dockerhub-token
|
from_secret: dockerhub-token
|
||||||
repo: viktorbarzin/immoweb
|
repo: viktorbarzin/immoweb
|
||||||
dockerfile: crawler/frontend/Dockerfile
|
dockerfile: crawler/frontend/Dockerfile
|
||||||
context: crawler/frontend
|
context: crawler/frontend
|
||||||
cache_from: "viktorbarzin/immoweb:latest"
|
|
||||||
auto_tag: true
|
auto_tag: true
|
||||||
|
|
||||||
- name: Update deployment
|
- name: Update deployment
|
||||||
image: alpine
|
image: alpine
|
||||||
commands:
|
commands:
|
||||||
- "apk add curl"
|
- apk add curl
|
||||||
- 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-ui -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head'
|
- 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-ui -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head'
|
||||||
|
|
||||||
---
|
---
|
||||||
kind: pipeline
|
kind: pipeline
|
||||||
type: kubernetes
|
type: kubernetes
|
||||||
name: api
|
name: api
|
||||||
timeout: 30m
|
|
||||||
|
|
||||||
trigger:
|
trigger:
|
||||||
event:
|
|
||||||
- push
|
|
||||||
- cron
|
|
||||||
branch:
|
branch:
|
||||||
- master
|
- master
|
||||||
|
event:
|
||||||
|
- push
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Build backend API image
|
- name: Build API image
|
||||||
image: plugins/docker
|
image: plugins/docker
|
||||||
settings:
|
settings:
|
||||||
username: "viktorbarzin"
|
username: viktorbarzin
|
||||||
password:
|
password:
|
||||||
from_secret: dockerhub-token
|
from_secret: dockerhub-token
|
||||||
repo: viktorbarzin/realestatecrawler
|
repo: viktorbarzin/realestatecrawler
|
||||||
dockerfile: crawler/Dockerfile
|
dockerfile: crawler/Dockerfile
|
||||||
context: crawler/
|
context: crawler/
|
||||||
cache_from: "viktorbarzin/realestatecrawler:latest"
|
|
||||||
auto_tag: true
|
auto_tag: true
|
||||||
|
|
||||||
- name: Update deployment
|
- name: Update deployment
|
||||||
image: alpine
|
image: alpine
|
||||||
commands:
|
commands:
|
||||||
- "apk add curl"
|
- apk add curl
|
||||||
- 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-api -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head'
|
- 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-api -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head'
|
||||||
# Sun Feb 1 08:29:56 PM UTC 2026
|
|
||||||
# Webhook test Sun Feb 1 08:45:52 PM UTC 2026
|
|
||||||
|
|
|
||||||
|
|
@ -1,51 +0,0 @@
|
||||||
import asyncio
|
|
||||||
from pathlib import Path
|
|
||||||
import aiohttp
|
|
||||||
from repositories import ListingRepository
|
|
||||||
from tenacity import retry, stop_after_attempt, wait_random
|
|
||||||
from tqdm.asyncio import tqdm
|
|
||||||
|
|
||||||
from models import Listing
|
|
||||||
|
|
||||||
# Setting this too high either crashes rightmove or gets us blocked
|
|
||||||
semaphore = asyncio.Semaphore(5)
|
|
||||||
|
|
||||||
|
|
||||||
async def dump_images(
|
|
||||||
repository: ListingRepository,
|
|
||||||
image_base_path: Path = Path("data/rs/"),
|
|
||||||
):
|
|
||||||
listings = await repository.get_listings()
|
|
||||||
updated_listings = await tqdm.gather(
|
|
||||||
*[dump_images_for_listing(listing, image_base_path) for listing in listings]
|
|
||||||
)
|
|
||||||
await repository.upsert_listings(
|
|
||||||
[listing for listing in updated_listings if listing is not None]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
|
|
||||||
async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None:
|
|
||||||
all_floorplans = listing.additional_info.get("property", {}).get("floorplans", [])
|
|
||||||
for floorplan in all_floorplans:
|
|
||||||
url = floorplan["url"]
|
|
||||||
picname = url.split("/")[-1]
|
|
||||||
floorplan_path = Path(base_path, str(listing.id), "floorplans", picname)
|
|
||||||
if floorplan_path.exists():
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
async with semaphore:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.get(url) as response:
|
|
||||||
if response.status == 404:
|
|
||||||
return None
|
|
||||||
if response.status != 200:
|
|
||||||
raise Exception(f"Error for {url}: {response.status}")
|
|
||||||
floorplan_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
with open(floorplan_path, "wb") as f:
|
|
||||||
f.write(await response.read())
|
|
||||||
listing.floorplan_image_paths.append(str(floorplan_path))
|
|
||||||
return listing
|
|
||||||
except Exception as e:
|
|
||||||
tqdm.write(f"Error for {url}: {e}")
|
|
||||||
raise e # raise so that we retry it
|
|
||||||
|
|
@ -1,43 +0,0 @@
|
||||||
import asyncio
|
|
||||||
from models import Listing
|
|
||||||
from rec import floorplan
|
|
||||||
from repositories.listing_repository import ListingRepository
|
|
||||||
from tqdm.asyncio import tqdm
|
|
||||||
import multiprocessing
|
|
||||||
|
|
||||||
|
|
||||||
async def detect_floorplan(repository: ListingRepository):
|
|
||||||
listings = await repository.get_listings()
|
|
||||||
cpu_count = multiprocessing.cpu_count() // 4
|
|
||||||
semaphore = asyncio.Semaphore(cpu_count)
|
|
||||||
|
|
||||||
updated_listings = [
|
|
||||||
listing
|
|
||||||
for listing in await tqdm.gather(
|
|
||||||
*[_calculate_sqm_ocr(listing, semaphore) for listing in listings]
|
|
||||||
)
|
|
||||||
if listing is not None
|
|
||||||
]
|
|
||||||
await repository.upsert_listings(updated_listings)
|
|
||||||
|
|
||||||
|
|
||||||
async def _calculate_sqm_ocr(
|
|
||||||
listing: Listing, semaphore: asyncio.Semaphore
|
|
||||||
) -> Listing | None:
|
|
||||||
if listing.square_meters is not None:
|
|
||||||
return None
|
|
||||||
sqms = []
|
|
||||||
for floorplan_path in listing.floorplan_image_paths:
|
|
||||||
async with semaphore:
|
|
||||||
estimated_sqm, _ = await asyncio.to_thread(
|
|
||||||
floorplan.calculate_ocr, floorplan_path
|
|
||||||
)
|
|
||||||
if estimated_sqm is not None:
|
|
||||||
sqms.append(estimated_sqm)
|
|
||||||
max_sqm = max(sqms, default=0) # try once, if we fail, keep as 0
|
|
||||||
# if max_sqm is not None:
|
|
||||||
listing.square_meters = max_sqm
|
|
||||||
return listing
|
|
||||||
# else:
|
|
||||||
# listing.square_meters = None
|
|
||||||
# return None
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue