From 4c23acdb557652730dea7ff42a50e93ba4d926f8 Mon Sep 17 00:00:00 2001
From: Kadir <ktugan@users.noreply.github.com>
Date: Sun, 14 Sep 2025 19:40:18 +0100
Subject: [PATCH] adding ruff auto check for pull requests as well as fixing
 all ruff errors (#1)

Co-authored-by: Kadir <git@k8n.dev>
---
 .github/workflows/ruff.yml | 39 ++++++++++++++++++++++++++++++++++++++
 crawler/1_dump_listings.py |  9 +++++----
 vqa/Untitled.ipynb         |  1 -
 vqa/main.py                |  2 +-
 vqa/vqa.py                 | 19 +++++++++++++++----
 5 files changed, 60 insertions(+), 10 deletions(-)
 create mode 100644 .github/workflows/ruff.yml

diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
new file mode 100644
index 0000000..e591c4e
--- /dev/null
+++ b/.github/workflows/ruff.yml
@@ -0,0 +1,39 @@
+name: Run Ruff and Auto-merge
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  ruff-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          # Fetch all history for diffing
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11' # Or your desired Python version
+
+      - name: Install Ruff
+        run: pip install ruff
+
+      - name: Get changed files
+        id: changed_files
+        run: |
+          # Get a list of changed files between the base and head commits of the PR
+          git diff --name-only --diff-filter=d ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} > changed_files.txt
+          # Filter for Python files
+          grep -E '\.py$' changed_files.txt > python_files.txt
+          # Remove newlines and join with spaces
+          echo "files=$(tr '\n' ' ' < python_files.txt)" >> $GITHUB_OUTPUT
+
+      - name: Run Ruff on changed files
+        if: steps.changed_files.outputs.files != ''
+        run: |
+          # The ruff command will only run if there are Python files to check
+          ruff check ${{ steps.changed_files.outputs.files }}
diff --git a/crawler/1_dump_listings.py b/crawler/1_dump_listings.py
index 38f65cc..f31f1b5 100644
--- a/crawler/1_dump_listings.py
+++ b/crawler/1_dump_listings.py
@@ -6,7 +6,8 @@ import logging
 import pathlib
 from typing import Any
 from listing_processor import ListingProcessor
-from rec.query import listing_query, QueryParameters
+from rec.query import listing_query
+from models.listing import QueryParameters
 from rec.districts import get_districts
 from repositories import ListingRepository
 from tqdm.asyncio import tqdm
@@ -35,7 +36,7 @@ async def dump_listings_full(
     # logger.debug("Completed floorplan detection")
     # refresh listings
     listings = await repository.get_listings(parameters)  # this can be better
-    new_listings = [l for l in listings if l.id in new_listings]
+    new_listings = [x for x in listings if x.id in new_listings]
     return new_listings
 
 
@@ -77,7 +78,7 @@ async def dump_listings(
             listings.append(listing)
 
     # if listing is already in db, do not fetch details again
-    all_listing_ids = [l.id for l in await repository.get_listings()]
+    all_listing_ids = [x.id for x in await repository.get_listings()]
     missing_listing = [
         listing for listing in listings if listing.identifier not in all_listing_ids
     ]
@@ -88,7 +89,7 @@ async def dump_listings(
     processed_listings = await tqdm.gather(
         *[listing_processor.process_listing(id) for id in missing_ids]
     )
-    filtered_listings = [l for l in processed_listings if l is not None]
+    filtered_listings = [x for x in processed_listings if x is not None]
 
     return filtered_listings
 
diff --git a/vqa/Untitled.ipynb b/vqa/Untitled.ipynb
index 9ee8422..0f1d18e 100644
--- a/vqa/Untitled.ipynb
+++ b/vqa/Untitled.ipynb
@@ -24,7 +24,6 @@
    "source": [
     "from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration\n",
     "from PIL import Image\n",
-    "import pandas as pd\n",
     "import re"
    ]
   },
diff --git a/vqa/main.py b/vqa/main.py
index 31931ea..d0400d5 100644
--- a/vqa/main.py
+++ b/vqa/main.py
@@ -1,4 +1,4 @@
-from vqa import Blip, MicrosoftGIT, PixStructDocVA, Vilt, Deplot, VQA
+from vqa import MicrosoftGIT, VQA
 from PIL import Image
 from typing import List
 from questions import load_questions
diff --git a/vqa/vqa.py b/vqa/vqa.py
index faad3c6..6bf7856 100644
--- a/vqa/vqa.py
+++ b/vqa/vqa.py
@@ -1,18 +1,24 @@
 from transformers import BlipProcessor, BlipForQuestionAnswering
 from transformers import ViltProcessor, ViltForQuestionAnswering
 from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
-from transformers import GitVisionConfig, GitVisionModel, AutoProcessor, GitProcessor
+from transformers import GitVisionModel, GitProcessor
+from abc import ABC, abstractmethod
+from transformers.processing_utils import ProcessorMixin
 
-class VQA:
+
+class VQA(ABC):
     name = "Not defined"
-    def query(image, question: str) -> str:
-        pass
+    @abstractmethod
+    def query(self, image, question: str) -> str:
+        return "Not implemented"
 
 class Blip(VQA):
     name = "Blip"
     def query(self, image, question):
         processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
         model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")
+        
+        assert processor is ProcessorMixin
         inputs = processor(image, question, return_tensors="pt")
         out = model.generate(max_new_tokens=50000, **inputs)
         return processor.decode(out[0], skip_special_tokens=True)
@@ -25,6 +31,7 @@ class Vilt(VQA):
         model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
 
         # prepare inputs
+        assert processor is ProcessorMixin
         encoding = processor(image, question, return_tensors="pt")
 
         # forward pass
@@ -41,6 +48,7 @@ class Deplot(VQA):
         processor = Pix2StructProcessor.from_pretrained('google/deplot')
         model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot')
 
+        assert processor is ProcessorMixin
         inputs = processor(images=image, text=question, return_tensors="pt")
         predictions = model.generate(**inputs, max_new_tokens=512)
         return processor.decode(predictions[0], skip_special_tokens=True)
@@ -53,6 +61,7 @@ class PixStructDocVA(VQA):
         model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-docvqa-large")
         processor = Pix2StructProcessor.from_pretrained("google/pix2struct-docvqa-large")
 
+        assert processor is ProcessorMixin
         inputs = processor(images=image, text=question, return_tensors="pt")
         predictions = model.generate(**inputs, max_new_tokens=10000)
         answer = processor.decode(predictions[0], skip_special_tokens=True)
@@ -64,6 +73,8 @@ class MicrosoftGIT(VQA):
     def query(self, image, question):
         processor = GitProcessor.from_pretrained("microsoft/git-base")
         model = GitVisionModel.from_pretrained("microsoft/git-base")
+        
+        assert processor is ProcessorMixin
         inputs = processor(images=image, text=question, return_tensors="pt")
         predictions = model.generate(**inputs, max_new_tokens=10000)
         answer = processor.decode(predictions[0], skip_special_tokens=True)