adding ruff auto check for pull requests as well as fixing all ruff errors (#1)

Co-authored-by: Kadir <git@k8n.dev>
This commit is contained in:
Kadir 2025-09-14 19:40:18 +01:00 committed by GitHub
parent b1e0a414cf
commit 4c23acdb55
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 60 additions and 10 deletions

39
.github/workflows/ruff.yml vendored Normal file
View file

@ -0,0 +1,39 @@
name: Run Ruff and Auto-merge
on:
pull_request:
types: [opened, synchronize, reopened]
jobs:
ruff-check:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
# Fetch all history for diffing
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11' # Or your desired Python version
- name: Install Ruff
run: pip install ruff
- name: Get changed files
id: changed_files
run: |
# Get a list of changed files between the base and head commits of the PR
git diff --name-only --diff-filter=d ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} > changed_files.txt
# Filter for Python files
grep -E '\.py$' changed_files.txt > python_files.txt
# Remove newlines and join with spaces
echo "files=$(tr '\n' ' ' < python_files.txt)" >> $GITHUB_OUTPUT
- name: Run Ruff on changed files
if: steps.changed_files.outputs.files != ''
run: |
# The ruff command will only run if there are Python files to check
ruff check ${{ steps.changed_files.outputs.files }}

View file

@ -6,7 +6,8 @@ import logging
import pathlib
from typing import Any
from listing_processor import ListingProcessor
from rec.query import listing_query, QueryParameters
from rec.query import listing_query
from models.listing import QueryParameters
from rec.districts import get_districts
from repositories import ListingRepository
from tqdm.asyncio import tqdm
@ -35,7 +36,7 @@ async def dump_listings_full(
# logger.debug("Completed floorplan detection")
# refresh listings
listings = await repository.get_listings(parameters) # this can be better
new_listings = [l for l in listings if l.id in new_listings]
new_listings = [x for x in listings if x.id in new_listings]
return new_listings
@ -77,7 +78,7 @@ async def dump_listings(
listings.append(listing)
# if listing is already in db, do not fetch details again
all_listing_ids = [l.id for l in await repository.get_listings()]
all_listing_ids = [x.id for x in await repository.get_listings()]
missing_listing = [
listing for listing in listings if listing.identifier not in all_listing_ids
]
@ -88,7 +89,7 @@ async def dump_listings(
processed_listings = await tqdm.gather(
*[listing_processor.process_listing(id) for id in missing_ids]
)
filtered_listings = [l for l in processed_listings if l is not None]
filtered_listings = [x for x in processed_listings if x is not None]
return filtered_listings

View file

@ -24,7 +24,6 @@
"source": [
"from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration\n",
"from PIL import Image\n",
"import pandas as pd\n",
"import re"
]
},

View file

@ -1,4 +1,4 @@
from vqa import Blip, MicrosoftGIT, PixStructDocVA, Vilt, Deplot, VQA
from vqa import MicrosoftGIT, VQA
from PIL import Image
from typing import List
from questions import load_questions

View file

@ -1,18 +1,24 @@
from transformers import BlipProcessor, BlipForQuestionAnswering
from transformers import ViltProcessor, ViltForQuestionAnswering
from transformers import Pix2StructProcessor, Pix2StructForConditionalGeneration
from transformers import GitVisionConfig, GitVisionModel, AutoProcessor, GitProcessor
from transformers import GitVisionModel, GitProcessor
from abc import ABC, abstractmethod
from transformers.processing_utils import ProcessorMixin
class VQA:
class VQA(ABC):
name = "Not defined"
def query(image, question: str) -> str:
pass
@abstractmethod
def query(self, image, question: str) -> str:
return "Not implemented"
class Blip(VQA):
name = "Blip"
def query(self, image, question):
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")
assert processor is ProcessorMixin
inputs = processor(image, question, return_tensors="pt")
out = model.generate(max_new_tokens=50000, **inputs)
return processor.decode(out[0], skip_special_tokens=True)
@ -25,6 +31,7 @@ class Vilt(VQA):
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
# prepare inputs
assert processor is ProcessorMixin
encoding = processor(image, question, return_tensors="pt")
# forward pass
@ -41,6 +48,7 @@ class Deplot(VQA):
processor = Pix2StructProcessor.from_pretrained('google/deplot')
model = Pix2StructForConditionalGeneration.from_pretrained('google/deplot')
assert processor is ProcessorMixin
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=512)
return processor.decode(predictions[0], skip_special_tokens=True)
@ -53,6 +61,7 @@ class PixStructDocVA(VQA):
model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-docvqa-large")
processor = Pix2StructProcessor.from_pretrained("google/pix2struct-docvqa-large")
assert processor is ProcessorMixin
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=10000)
answer = processor.decode(predictions[0], skip_special_tokens=True)
@ -64,6 +73,8 @@ class MicrosoftGIT(VQA):
def query(self, image, question):
processor = GitProcessor.from_pretrained("microsoft/git-base")
model = GitVisionModel.from_pretrained("microsoft/git-base")
assert processor is ProcessorMixin
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=10000)
answer = processor.decode(predictions[0], skip_special_tokens=True)