24 lines
605 B
Python
24 lines
605 B
Python
from vqa import MicrosoftGIT, VQA
|
|
from PIL import Image
|
|
from typing import List
|
|
from questions import load_questions
|
|
|
|
image = Image.open("floorplans/46001_32532509_FLP_00_0000.jpeg")
|
|
|
|
questions = load_questions(False)
|
|
|
|
models: List[VQA] = [
|
|
# Blip(),
|
|
# Vilt(),
|
|
# Deplot(),
|
|
# PixStructDocVA(),
|
|
MicrosoftGIT(),
|
|
]
|
|
|
|
for question, answer in questions.items():
|
|
answers = {model.name: model.query(image, question) for model in models}
|
|
|
|
print("# Question:", question)
|
|
for modelname, answer in answers.items():
|
|
print(f"{modelname}: {answer}")
|
|
print("Expected:", answer)
|