{
- "headers": [
- "Model",
- "Type",
- "General",
- "Hallucination",
- "Reasoning",
- "Overall Consistency",
- "Macro Average",
- "Open Source?",
- "Model Size"
- "data": [
- [
- "<a href="https://huggingface.co/Skywork/Skywork-VL-Reward-7B" target="_blank">Skywork/Skywork-VL-Reward-7B</a>",
- "Seq. Classifier",
- "65.6%",
- "80.2%",
- "61.3%",
- "73.3%",
- "69.0%",
- "Yes",
- "7-13B"
- [
- "<a href="https://ai.google.dev/gemini-api/docs/models/gemini#gemini-2.0-flash" target="_blank">Gemini-2.0-flash-exp (2024-12) </a>",
- "Generative",
- "50.8%",
- "72.6%",
- "70.1%",
- "68.8%",
- "64.5%",
- "No",
- "Unknown"
- [
- "<a href="https://www.anthropic.com/news/claude-3-5-sonnet" target="_blank">Gemini-1.5-Pro (2024-09-24)</a>",
- "Generative",
- "50.8%",
- "72.5%",
- "64.2%",
- "67.2%",
- "62.5%",
- "No",
- "Unknown"
- [
- "<a href="https://platform.openai.com/docs/models/gpt-4o" target="_blank">GPT-4o (2024-08-06)</a>",
- "Generative",
- "49.1%",
- "67.6%",
- "70.5%",
- "65.8%",
- "62.4%",
- "No",
- "Unknown"
- [
- "<a href="https://ai.google.dev/gemini-api/docs/models/gemini#gemini-1.5-flash" target="_blank">Gemini-1.5-Flash (2024-09-24)</a>",
- "Generative",
- "47.8%",
- "59.6%",
- "58.4%",
- "57.6%",
- "55.3%",
- "No",
- "Unknown"
- [
- "<a href="https://huggingface.co/allenai/Molmo-7B-D-0924" target="_blank">meta-llama/Llama-3.2-90B-Vision-Instruct</a>",
- "Generative",
- "42.6%",
- "57.3%",
- "61.7%",
- "56.2%",
- "53.9%",
- "Yes",
- ">13B"
- [
- "<a href="https://www.anthropic.com/news/claude-3-5-sonnet" target="_blank">Claude-3.5-Sonnet (2024-06-22)</a>",
- "Generative",
- "43.4%",
- "55.0%",
- "62.3%",
- "55.3%",
- "53.6%",
- "No",
- "Unknown"
- [
- "<a href="https://cloud.siliconflow.cn/open/models?target=TeleAI/TeleMM" target="_blank">TeleAI/TeleMM</a>",
- "Generative",
- "47.1%",
- "38.9%",
- "46.1%",
- "54.9%",
- "44.0%",
- "No",
- "Unknown"
- [
- "<a href="https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-vl-plus-api" target="_blank">qwen-vl-max (2024-11-19)</a>",
- "Generative",
- "40.6%",
- "46.0%",
- "57.6%",
- "48.2%",
- "48.1%",
- "No",
- "Unknown"
- [
- "<a href="https://huggingface.co/Qwen/QVQ-72B-Preview" target="_blank">Qwen/QVQ-72B-Preview</a>",
- "Generative",
- "41.8%",
- "46.2%",
- "51.2%",
- "46.4%",
- "46.4%",
- "Yes",
- ">13B"
- [
- "<a href="https://huggingface.co/MAmmoTH-VL/MAmmoTH-VL-8B-SI" target="_blank">MAmmoTH-VL/MAmmoTH-VL-8B-SI</a>",
- "Generative",
- "42.0%",
- "41.0%",
- "53.0%",
- "45.2%",
- "45.3%",
- "Yes",
- "7-13B"
- [
- "<a href="https://huggingface.co/OpenGVLab/InternVL2-8B" target="_blank">OpenGVLab/InternVL2-8B</a>",
- "Generative",
- "35.6%",
- "41.1%",
- "59.0%",
- "44.5%",
- "45.2%",
- "Yes",
- "7-13B"
- [
- "<a href="https://huggingface.co/allenai/Molmo-72B-0924" target="_blank">allenai/Molmo-72B-0924</a>",
- "Generative",
- "33.9%",
- "42.3%",
- "54.9%",
- "44.1%",
- "43.7%",
- "Yes",
- ">13B"
- [
- "<a href="https://huggingface.co/OpenGVLab/InternVL2-26B" target="_blank">OpenGVLab/InternVL2-26B</a>",
- "Generative",
- "39.3%",
- "36.9%",
- "60.8%",
- "43.2%",
- "45.7%",
- "Yes",
- ">13B"
- [
- "<a href="https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct" target="_blank">meta-llama/Llama-3.2-11B-Vision-Instruct</a>",
- "Generative",
- "33.3%",
- "38.4%",
- "56.6%",
- "42.9%",
- "42.8%",
- "Yes",
- "7-13B"
- [
- "<a href="https://huggingface.co/MAmmoTH-VL/MAmmoTH-VL-8B" target="_blank">MAmmoTH-VL/MAmmoTH-VL-8B</a>",
- "Generative",
- "36.0%",
- "40.0%",
- "52.0%",
- "42.2%",
- "42.7%",
- "Yes",
- "7-13B"
- [
- "<a href="https://platform.openai.com/docs/models/gpt-4o-mini" target="_blank">GPT-4o-mini (2024-07-18)</a>",
- "Generative",
- "41.7%",
- "34.5%",
- "58.2%",
- "41.5%",
- "44.8%",
- "No",
- "Unknown"
- [
- "<a href="https://huggingface.co/rhymes-ai/Aria" target="_blank">rhymes-ai/Aria</a>",
- "Generative",
- "37.9%",
- "33.1%",
- "64.0%",
- "41.1%",
- "45.0%",
- "Yes",
- "<7B"
- [
- "<a href="https://huggingface.co/nvidia/NVLM-D-72B" target="_blank">nvidia/NVLM-D-72B</a>",
- "Generative",
- "38.9%",
- "31.6%",
- "62.0%",
- "40.1%",
- "44.2%",
- "Yes",
- ">13B"
- [
- "<a href="https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct" target="_blank">Qwen/Qwen2-VL-72B-Instruct </a>",
- "Generative",
- "38.1%",
- "32.8%",
- "58.0%",
- "39.5%",
- "43.0%",
- "Yes",
- ">13B"
- [
- "<a href="https://huggingface.co/allenai/Molmo-72B-0924" target="_blank">allenai/Molmo-7B-D-0924</a>",
- "Generative",
- "31.1%",
- "31.8%",
- "56.2%",
- "37.5%",
- "39.7%",
- "Yes",
- "7-13B"
- [
- "<a href="https://huggingface.co/mistralai/Pixtral-12B-2409" target="_blank">mistralai/Pixtral-12B-2409</a>",
- "Generative",
- "35.6%",
- "25.9%",
- "59.9%",
- "35.8%",
- "40.5%",
- "Yes",
- "7-13B"
- [
- "<a href="https://huggingface.co/deepseek-ai/deepseek-vl2" target="_blank">deepseek-ai/deepseek-vl2</a>",
- "Generative",
- "29.7%",
- "23.8%",
- "50.9%",
- "30.3%",
- "34.8%",
- "Yes",
- "<7B"
- [
- "<a href="https://hf.co/lmms-lab/llava-onevision-qwen2-7b-ov" target="_blank">lmms/llava-onevision-qwen2-7b-ov</a>",
- "Generative",
- "32.2%",
- "20.1%",
- "57.1%",
- "29.6%",
- "36.5%",
- "Yes",
- "7-13B"
- [
- "<a href="https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct" target="_blank">Qwen/Qwen2-VL-7B-Instruct</a>",
- "Generative",
- "31.6%",
- "19.1%",
- "51.1%",
- "28.3%",
- "33.9%",
- "Yes",
- "7-13B"
- [
- "<a href="https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct" target="_blank">microsoft/Phi-3.5-vision-instruct</a>",
- "Generative",
- "28.0%",
- "22.4%",
- "56.6%",
- "28.2%",
- "35.7%",
- "Yes",
- "<7B"
- [
- "metadata": null