From 4c23acdb557652730dea7ff42a50e93ba4d926f8 Mon Sep 17 00:00:00 2001 From: Kadir Date: Sun, 14 Sep 2025 19:40:18 +0100 Subject: [PATCH] adding ruff auto check for pull requests as well as fixing all ruff errors (#1) Co-authored-by: Kadir --- .github/workflows/ruff.yml | 39 ++++++++++++++++++++++++++++++++++++++ crawler/1_dump_listings.py | 9 +++++---- vqa/Untitled.ipynb | 1 - vqa/main.py | 2 +- vqa/vqa.py | 19 +++++++++++++++---- 5 files changed, 60 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/ruff.yml diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 0000000..e591c4e --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,39 @@ +name: Run Ruff and Auto-merge + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + ruff-check: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + # Fetch all history for diffing + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' # Or your desired Python version + + - name: Install Ruff + run: pip install ruff + + - name: Get changed files + id: changed_files + run: | + # Get a list of changed files between the base and head commits of the PR + git diff --name-only --diff-filter=d ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} > changed_files.txt + # Filter for Python files + grep -E '\.py$' changed_files.txt > python_files.txt + # Remove newlines and join with spaces + echo "files=$(tr '\n' ' ' < python_files.txt)" >> $GITHUB_OUTPUT + + - name: Run Ruff on changed files + if: steps.changed_files.outputs.files != '' + run: | + # The ruff command will only run if there are Python files to check + ruff check ${{ steps.changed_files.outputs.files }} diff --git a/crawler/1_dump_listings.py b/crawler/1_dump_listings.py index 38f65cc..f31f1b5 100644 --- a/crawler/1_dump_listings.py +++ b/crawler/1_dump_listings.py @@ -6,7 +6,8 @@ import logging import pathlib from typing import Any from listing_processor import ListingProcessor -from rec.query import listing_query, QueryParameters +from rec.query import listing_query +from models.listing import QueryParameters from rec.districts import get_districts from repositories import ListingRepository from tqdm.asyncio import tqdm @@ -35,7 +36,7 @@ async def dump_listings_full( # logger.debug("Completed floorplan detection") # refresh listings listings = await repository.get_listings(parameters) # this can be better - new_listings = [l for l in listings if l.id in new_listings] + new_listings = [x for x in listings if x.id in new_listings] return new_listings @@ -77,7 +78,7 @@ async def dump_listings( listings.append(listing) # if listing is already in db, do not fetch details again - all_listing_ids = [l.id for l in await repository.get_listings()] + all_listing_ids = [x.id for x in await repository.get_listings()] missing_listing = [ listing for listing in listings if listing.identifier not in all_listing_ids ] @@ -88,7 +89,7 @@ async def dump_listings( processed_listings = await tqdm.gather( *[listing_processor.process_listing(id) for id in missing_ids] ) - filtered_listings = [l for l in processed_listings if l is not None] + filtered_listings = [x for x in processed_listings if x is not None] return filtered_listings diff --git a/vqa/Untitled.ipynb b/vqa/Untitled.ipynb index 9ee8422..0f1d18e 100644 --- a/vqa/Untitled.ipynb +++ b/vqa/Untitled.ipynb @@ -24,7 +24,6 @@ "source": [ "from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration\n", "from PIL import Image\n", - "import pandas as pd\n", "import re" ] }, diff --git a/vqa/main.py b/vqa/main.py index 31931ea..d0400d5 100644 --- a/vqa/main.py +++ b/vqa/main.py @@ -1,4 +1,4 @@ -from vqa import Blip, MicrosoftGIT, PixStructDocVA, Vilt, Deplot, VQA +from vqa import MicrosoftGIT, VQA from PIL import Image from typing import List from questions import load_questions diff --git a/vqa/vqa.py b/vqa/vqa.py index faad3c6..6bf7856 100644 --- a/vqa/vqa.py +++ b/vqa/vqa.py @@ -1,18 +1,24 @@ from transformers import BlipProcessor, BlipForQuestionAnswering from transformers import ViltProcessor, ViltForQuestionAnswering from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration -from transformers import GitVisionConfig, GitVisionModel, AutoProcessor, GitProcessor +from transformers import GitVisionModel, GitProcessor +from abc import ABC, abstractmethod +from transformers.processing_utils import ProcessorMixin -class VQA: + +class VQA(ABC): name = "Not defined" - def query(image, question: str) -> str: - pass + @abstractmethod + def query(self, image, question: str) -> str: + return "Not implemented" class Blip(VQA): name = "Blip" def query(self, image, question): processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large") model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large") + + assert processor is ProcessorMixin inputs = processor(image, question, return_tensors="pt") out = model.generate(max_new_tokens=50000, **inputs) return processor.decode(out[0], skip_special_tokens=True) @@ -25,6 +31,7 @@ class Vilt(VQA): model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") # prepare inputs + assert processor is ProcessorMixin encoding = processor(image, question, return_tensors="pt") # forward pass @@ -41,6 +48,7 @@ class Deplot(VQA): processor = Pix2StructProcessor.from_pretrained('google/deplot') model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot') + assert processor is ProcessorMixin inputs = processor(images=image, text=question, return_tensors="pt") predictions = model.generate(**inputs, max_new_tokens=512) return processor.decode(predictions[0], skip_special_tokens=True) @@ -53,6 +61,7 @@ class PixStructDocVA(VQA): model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-docvqa-large") processor = Pix2StructProcessor.from_pretrained("google/pix2struct-docvqa-large") + assert processor is ProcessorMixin inputs = processor(images=image, text=question, return_tensors="pt") predictions = model.generate(**inputs, max_new_tokens=10000) answer = processor.decode(predictions[0], skip_special_tokens=True) @@ -64,6 +73,8 @@ class MicrosoftGIT(VQA): def query(self, image, question): processor = GitProcessor.from_pretrained("microsoft/git-base") model = GitVisionModel.from_pretrained("microsoft/git-base") + + assert processor is ProcessorMixin inputs = processor(images=image, text=question, return_tensors="pt") predictions = model.generate(**inputs, max_new_tokens=10000) answer = processor.decode(predictions[0], skip_special_tokens=True)