from vqa import Blip, MicrosoftGIT, PixStructDocVA, Vilt, Deplot, VQA from PIL import Image from typing import List from questions import load_questions image = Image.open("floorplans/46001_32532509_FLP_00_0000.jpeg") questions = load_questions(False) models: List[VQA] = [ # Blip(), # Vilt(), # Deplot(), # PixStructDocVA(), MicrosoftGIT(), ] for question, answer in questions.items(): answers = {model.name: model.query(image, question) for model in models} print("# Question:", question) for modelname, answer in answers.items(): print(f"{modelname}: {answer}") print("Expected:", answer)