25 lines
641 B
Python
25 lines
641 B
Python
|
|
from vqa import Blip, MicrosoftGIT, PixStructDocVA, Vilt, Deplot, VQA
|
||
|
|
from PIL import Image
|
||
|
|
from typing import List
|
||
|
|
from questions import load_questions
|
||
|
|
|
||
|
|
image = Image.open("floorplans/46001_32532509_FLP_00_0000.jpeg")
|
||
|
|
|
||
|
|
questions = load_questions(False)
|
||
|
|
|
||
|
|
models: List[VQA] = [
|
||
|
|
# Blip(),
|
||
|
|
# Vilt(),
|
||
|
|
# Deplot(),
|
||
|
|
# PixStructDocVA(),
|
||
|
|
MicrosoftGIT(),
|
||
|
|
]
|
||
|
|
|
||
|
|
for question, answer in questions.items():
|
||
|
|
answers = {model.name: model.query(image, question) for model in models}
|
||
|
|
|
||
|
|
print("# Question:", question)
|
||
|
|
for modelname, answer in answers.items():
|
||
|
|
print(f"{modelname}: {answer}")
|
||
|
|
print("Expected:", answer)
|