wrongmove/vqa/main.py
2025-09-14 19:40:18 +01:00

24 lines
605 B
Python

from vqa import MicrosoftGIT, VQA
from PIL import Image
from typing import List
from questions import load_questions
image = Image.open("floorplans/46001_32532509_FLP_00_0000.jpeg")
questions = load_questions(False)
models: List[VQA] = [
# Blip(),
# Vilt(),
# Deplot(),
# PixStructDocVA(),
MicrosoftGIT(),
]
for question, answer in questions.items():
answers = {model.name: model.query(image, question) for model in models}
print("# Question:", question)
for modelname, answer in answers.items():
print(f"{modelname}: {answer}")
print("Expected:", answer)