From ccef50b37112fc3b29f598593858a7a0b41ff6de Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Feb 2026 20:47:50 +0000 Subject: [PATCH 1/7] Trigger webhook --- .drone.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.drone.yml b/.drone.yml index 271575b..49af7fc 100644 --- a/.drone.yml +++ b/.drone.yml @@ -60,3 +60,4 @@ steps: - 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-api -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head' # Sun Feb 1 08:29:56 PM UTC 2026 # Webhook test Sun Feb 1 08:45:52 PM UTC 2026 +# 1769978870 From 4969df87457b04f9791519d1d67a94b9e0a6461d Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Feb 2026 20:51:13 +0000 Subject: [PATCH 2/7] Test new webhook --- .drone.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.drone.yml b/.drone.yml index 49af7fc..601caf7 100644 --- a/.drone.yml +++ b/.drone.yml @@ -61,3 +61,4 @@ steps: # Sun Feb 1 08:29:56 PM UTC 2026 # Webhook test Sun Feb 1 08:45:52 PM UTC 2026 # 1769978870 +# test 1769979073 From fe01df0e7e00e46b5802b207866e89e543751126 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Feb 2026 20:52:23 +0000 Subject: [PATCH 3/7] delete dump images as it is renamed --- crawler/3_dump_images.py | 51 ---------------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 crawler/3_dump_images.py diff --git a/crawler/3_dump_images.py b/crawler/3_dump_images.py deleted file mode 100644 index afc3fd5..0000000 --- a/crawler/3_dump_images.py +++ /dev/null @@ -1,51 +0,0 @@ -import asyncio -from pathlib import Path -import aiohttp -from repositories import ListingRepository -from tenacity import retry, stop_after_attempt, wait_random -from tqdm.asyncio import tqdm - -from models import Listing - -# Setting this too high either crashes rightmove or gets us blocked -semaphore = asyncio.Semaphore(5) - - -async def dump_images( - repository: ListingRepository, - image_base_path: Path = Path("data/rs/"), -): - listings = await repository.get_listings() - updated_listings = await tqdm.gather( - *[dump_images_for_listing(listing, image_base_path) for listing in listings] - ) - await repository.upsert_listings( - [listing for listing in updated_listings if listing is not None] - ) - - -@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3)) -async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None: - all_floorplans = listing.additional_info.get("property", {}).get("floorplans", []) - for floorplan in all_floorplans: - url = floorplan["url"] - picname = url.split("/")[-1] - floorplan_path = Path(base_path, str(listing.id), "floorplans", picname) - if floorplan_path.exists(): - continue - try: - async with semaphore: - async with aiohttp.ClientSession() as session: - async with session.get(url) as response: - if response.status == 404: - return None - if response.status != 200: - raise Exception(f"Error for {url}: {response.status}") - floorplan_path.parent.mkdir(parents=True, exist_ok=True) - with open(floorplan_path, "wb") as f: - f.write(await response.read()) - listing.floorplan_image_paths.append(str(floorplan_path)) - return listing - except Exception as e: - tqdm.write(f"Error for {url}: {e}") - raise e # raise so that we retry it From dc1186601aa3c51c6a33d57657094af87115693f Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Feb 2026 20:54:53 +0000 Subject: [PATCH 4/7] Final webhook test --- .drone.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.drone.yml b/.drone.yml index 601caf7..91acb8d 100644 --- a/.drone.yml +++ b/.drone.yml @@ -62,3 +62,4 @@ steps: # Webhook test Sun Feb 1 08:45:52 PM UTC 2026 # 1769978870 # test 1769979073 +# final test 1769979293 From f88bba032f6426ae4129116ecced3416891e5a7f Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Feb 2026 20:55:59 +0000 Subject: [PATCH 5/7] Test without secret --- .drone.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.drone.yml b/.drone.yml index 91acb8d..f4e62cb 100644 --- a/.drone.yml +++ b/.drone.yml @@ -63,3 +63,4 @@ steps: # 1769978870 # test 1769979073 # final test 1769979293 +# no secret test 1769979359 From 22aa9c86a70a729c631185cb5e12bfd316f39ab2 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Feb 2026 21:53:18 +0000 Subject: [PATCH 6/7] update drone.yaml to build on push --- .drone.yml | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/.drone.yml b/.drone.yml index f4e62cb..456e6f1 100644 --- a/.drone.yml +++ b/.drone.yml @@ -1,66 +1,56 @@ kind: pipeline type: kubernetes name: frontend -timeout: 30m trigger: - event: - - push - - cron branch: - - master + - master + event: + - push steps: - name: Build frontend image image: plugins/docker settings: - username: "viktorbarzin" + username: viktorbarzin password: from_secret: dockerhub-token repo: viktorbarzin/immoweb dockerfile: crawler/frontend/Dockerfile context: crawler/frontend - cache_from: "viktorbarzin/immoweb:latest" auto_tag: true + - name: Update deployment image: alpine commands: - - "apk add curl" + - apk add curl - 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-ui -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head' --- kind: pipeline type: kubernetes name: api -timeout: 30m trigger: - event: - - push - - cron branch: - - master + - master + event: + - push steps: - - name: Build backend API image + - name: Build API image image: plugins/docker settings: - username: "viktorbarzin" + username: viktorbarzin password: from_secret: dockerhub-token repo: viktorbarzin/realestatecrawler dockerfile: crawler/Dockerfile context: crawler/ - cache_from: "viktorbarzin/realestatecrawler:latest" auto_tag: true + - name: Update deployment image: alpine commands: - - "apk add curl" + - apk add curl - 'curl -X PATCH https://kubernetes:6443/apis/apps/v1/namespaces/realestate-crawler/deployments/realestate-crawler-api -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -H "Content-Type:application/strategic-merge-patch+json" -k -d ''{"spec": {"template": {"metadata": { "annotations": {"kubectl.kubernetes.io/restartedAt": "''$(date +%Y-%m-%dT%TZ)''" }}}}}'' | head' -# Sun Feb 1 08:29:56 PM UTC 2026 -# Webhook test Sun Feb 1 08:45:52 PM UTC 2026 -# 1769978870 -# test 1769979073 -# final test 1769979293 -# no secret test 1769979359 From 4b1e971edf30c083fe5385cb32454d96f16c33ff Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Feb 2026 21:54:15 +0000 Subject: [PATCH 7/7] delete detect floorplan as it was renamed --- crawler/4_detect_floorplan.py | 43 ----------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 crawler/4_detect_floorplan.py diff --git a/crawler/4_detect_floorplan.py b/crawler/4_detect_floorplan.py deleted file mode 100644 index bf02f67..0000000 --- a/crawler/4_detect_floorplan.py +++ /dev/null @@ -1,43 +0,0 @@ -import asyncio -from models import Listing -from rec import floorplan -from repositories.listing_repository import ListingRepository -from tqdm.asyncio import tqdm -import multiprocessing - - -async def detect_floorplan(repository: ListingRepository): - listings = await repository.get_listings() - cpu_count = multiprocessing.cpu_count() // 4 - semaphore = asyncio.Semaphore(cpu_count) - - updated_listings = [ - listing - for listing in await tqdm.gather( - *[_calculate_sqm_ocr(listing, semaphore) for listing in listings] - ) - if listing is not None - ] - await repository.upsert_listings(updated_listings) - - -async def _calculate_sqm_ocr( - listing: Listing, semaphore: asyncio.Semaphore -) -> Listing | None: - if listing.square_meters is not None: - return None - sqms = [] - for floorplan_path in listing.floorplan_image_paths: - async with semaphore: - estimated_sqm, _ = await asyncio.to_thread( - floorplan.calculate_ocr, floorplan_path - ) - if estimated_sqm is not None: - sqms.append(estimated_sqm) - max_sqm = max(sqms, default=0) # try once, if we fail, keep as 0 - # if max_sqm is not None: - listing.square_meters = max_sqm - return listing - # else: - # listing.square_meters = None - # return None