🤐 Do Chatbot LLMs Talk Too Much?
YapBench Benchmark
Measuring how wordy (length‑biased) LLMs are when a short answer would do.
tabularis.ai | Total models: 76 | Last Update: 31 December 2025
{
  • "headers": [
    • "Rank",
    • "model",
    • "YapIndex",
    • "Cat A",
    • "Cat B",
    • "Cat C",
    • "YapTax$"
    ],
  • "data": [
    • [
      • "🥇",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-3.5-turbo</a>",
      • "<span style="display:none">000022.7</span>22.7<span style="font-size:0.75em;color:#888;"> ±4.8</span>",
      • "18.0<span style="font-size:0.75em;color:#888;"> ±8.5</span>",
      • "36.0<span style="font-size:0.75em;color:#888;"> ±6.3</span>",
      • "14.0<span style="font-size:0.75em;color:#888;"> ±7.5</span>",
      • 0.02
      ],
    • [
      • "🥈",
      • "<a target="_blank" href="https://huggingface.co/moonshotai/kimi-k2-0905" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">moonshotai/kimi-k2-0905</a>",
      • "<span style="display:none">000044.7</span>44.7<span style="font-size:0.75em;color:#888;"> ±4.8</span>",
      • "81.0<span style="font-size:0.75em;color:#888;"> ±11.5</span>",
      • "38.5<span style="font-size:0.75em;color:#888;"> ±5.0</span>",
      • "14.5<span style="font-size:0.75em;color:#888;"> ±4.2</span>",
      • 0.05
      ],
    • [
      • "🥉",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-4</a>",
      • "<span style="display:none">000051.2</span>51.2<span style="font-size:0.75em;color:#888;"> ±20.6</span>",
      • "44.0<span style="font-size:0.75em;color:#888;"> ±16.0</span>",
      • "28.0<span style="font-size:0.75em;color:#888;"> ±2.8</span>",
      • "81.5<span style="font-size:0.75em;color:#888;"> ±56.5</span>",
      • 1.39
      ],
    • [
      • "4",
      • "<a target="_blank" href="https://huggingface.co/z-ai/glm-4.7" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">z-ai/glm-4.7</a>",
      • "<span style="display:none">000066.3</span>66.3<span style="font-size:0.75em;color:#888;"> ±40.8</span>",
      • "138.5<span style="font-size:0.75em;color:#888;"> ±111.5</span>",
      • "58.5<span style="font-size:0.75em;color:#888;"> ±22.5</span>",
      • "2.0<span style="font-size:0.75em;color:#888;"> ±1.8</span>",
      • 0.07
      ],
    • [
      • "5",
      • "<a target="_blank" href="https://x.ai/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">x-ai/grok-4-fast</a>",
      • "<span style="display:none">000070.2</span>70.2<span style="font-size:0.75em;color:#888;"> ±15.0</span>",
      • "55.5<span style="font-size:0.75em;color:#888;"> ±18.0</span>",
      • "153.5<span style="font-size:0.75em;color:#888;"> ±39.8</span>",
      • "1.5<span style="font-size:0.75em;color:#888;"> ±2.5</span>",
      • 0.02
      ],
    • [
      • "6",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-4.1-mini</a>",
      • "<span style="display:none">000075.7</span>75.7<span style="font-size:0.75em;color:#888;"> ±15.3</span>",
      • "30.0<span style="font-size:0.75em;color:#888;"> ±5.5</span>",
      • "42.0<span style="font-size:0.75em;color:#888;"> ±5.8</span>",
      • "155.0<span style="font-size:0.75em;color:#888;"> ±43.5</span>",
      • 0.06
      ],
    • [
      • "7",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/o3</a>",
      • "<span style="display:none">000084.3</span>84.3<span style="font-size:0.75em;color:#888;"> ±14.7</span>",
      • "96.5<span style="font-size:0.75em;color:#888;"> ±25.0</span>",
      • "47.0<span style="font-size:0.75em;color:#888;"> ±7.8</span>",
      • "109.5<span style="font-size:0.75em;color:#888;"> ±33.0</span>",
      • 0.29
      ],
    • [
      • "8",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-5.1-codex</a>",
      • "<span style="display:none">000085.3</span>85.3<span style="font-size:0.75em;color:#888;"> ±7.4</span>",
      • "61.5<span style="font-size:0.75em;color:#888;"> ±14.2</span>",
      • "41.0<span style="font-size:0.75em;color:#888;"> ±4.5</span>",
      • "153.5<span style="font-size:0.75em;color:#888;"> ±14.0</span>",
      • 0.27
      ],
    • [
      • "9",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-5.2</a>",
      • "<span style="display:none">000086.8</span>86.8<span style="font-size:0.75em;color:#888;"> ±16.7</span>",
      • "190.0<span style="font-size:0.75em;color:#888;"> ±33.5</span>",
      • "49.5<span style="font-size:0.75em;color:#888;"> ±11.0</span>",
      • "21.0<span style="font-size:0.75em;color:#888;"> ±31.0</span>",
      • 0.55
      ],
    • [
      • "10",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-5.2 (reasoning)</a>",
      • "<span style="display:none">000089.8</span>89.8<span style="font-size:0.75em;color:#888;"> ±22.0</span>",
      • "196.5<span style="font-size:0.75em;color:#888;"> ±49.0</span>",
      • "45.5<span style="font-size:0.75em;color:#888;"> ±9.8</span>",
      • "27.5<span style="font-size:0.75em;color:#888;"> ±34.8</span>",
      • 0.52
      ],
    • [
      • "11",
      • "<a target="_blank" href="https://docs.anthropic.com/en/docs/models-overview" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">anthropic/claude-opus-4.5</a>",
      • "<span style="display:none">000097.0</span>97.0<span style="font-size:0.75em;color:#888;"> ±28.9</span>",
      • "217.5<span style="font-size:0.75em;color:#888;"> ±88.8</span>",
      • "50.5<span style="font-size:0.75em;color:#888;"> ±18.5</span>",
      • "23.0<span style="font-size:0.75em;color:#888;"> ±5.5</span>",
      • 1.52
      ],
    • [
      • "12",
      • "<a target="_blank" href="https://x.ai/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">x-ai/grok-4.1-fast</a>",
      • "<span style="display:none">000098.0</span>98.0<span style="font-size:0.75em;color:#888;"> ±11.0</span>",
      • "27.0<span style="font-size:0.75em;color:#888;"> ±11.0</span>",
      • "263.0<span style="font-size:0.75em;color:#888;"> ±28.5</span>",
      • "4.0<span style="font-size:0.75em;color:#888;"> ±4.2</span>",
      • 0.03
      ],
    • [
      • "13",
      • "<a target="_blank" href="https://docs.anthropic.com/en/docs/models-overview" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">anthropic/claude-opus-4.5 (reasoning)</a>",
      • "<span style="display:none">000099.2</span>99.2<span style="font-size:0.75em;color:#888;"> ±29.3</span>",
      • "224.5<span style="font-size:0.75em;color:#888;"> ±90.3</span>",
      • "50.0<span style="font-size:0.75em;color:#888;"> ±18.5</span>",
      • "23.0<span style="font-size:0.75em;color:#888;"> ±5.5</span>",
      • 1.44
      ],
    • [
      • "14",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-5</a>",
      • "<span style="display:none">000100.7</span>100.7<span style="font-size:0.75em;color:#888;"> ±22.8</span>",
      • "272.5<span style="font-size:0.75em;color:#888;"> ±70.0</span>",
      • "8.5<span style="font-size:0.75em;color:#888;"> ±16.0</span>",
      • "21.0<span style="font-size:0.75em;color:#888;"> ±14.5</span>",
      • 0.33
      ],
    • [
      • "15",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-4.1-nano</a>",
      • "<span style="display:none">000105.2</span>105.2<span style="font-size:0.75em;color:#888;"> ±10.7</span>",
      • "54.5<span style="font-size:0.75em;color:#888;"> ±16.0</span>",
      • "41.5<span style="font-size:0.75em;color:#888;"> ±4.2</span>",
      • "219.5<span style="font-size:0.75em;color:#888;"> ±23.0</span>",
      • 0.02
      ],
    • [
      • "16",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-5.1</a>",
      • "<span style="display:none">000111.8</span>111.8<span style="font-size:0.75em;color:#888;"> ±22.6</span>",
      • "214.0<span style="font-size:0.75em;color:#888;"> ±58.0</span>",
      • "44.0<span style="font-size:0.75em;color:#888;"> ±12.8</span>",
      • "77.5<span style="font-size:0.75em;color:#888;"> ±43.3</span>",
      • 0.42
      ],
    • [
      • "17",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/o4-mini</a>",
      • "<span style="display:none">000115.5</span>115.5<span style="font-size:0.75em;color:#888;"> ±27.0</span>",
      • "88.0<span style="font-size:0.75em;color:#888;"> ±79.5</span>",
      • "80.0<span style="font-size:0.75em;color:#888;"> ±17.5</span>",
      • "178.5<span style="font-size:0.75em;color:#888;"> ±35.8</span>",
      • 0.27
      ],
    • [
      • "18",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-5.1 (reasoning)</a>",
      • "<span style="display:none">000120.8</span>120.8<span style="font-size:0.75em;color:#888;"> ±22.3</span>",
      • "223.0<span style="font-size:0.75em;color:#888;"> ±48.0</span>",
      • "46.0<span style="font-size:0.75em;color:#888;"> ±12.5</span>",
      • "93.5<span style="font-size:0.75em;color:#888;"> ±44.8</span>",
      • 0.43
      ],
    • [
      • "19",
      • "<a target="_blank" href="https://x.ai/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">x-ai/grok-4</a>",
      • "<span style="display:none">000126.3</span>126.3<span style="font-size:0.75em;color:#888;"> ±19.8</span>",
      • "129.0<span style="font-size:0.75em;color:#888;"> ±9.8</span>",
      • "248.0<span style="font-size:0.75em;color:#888;"> ±54.3</span>",
      • "2.0<span style="font-size:0.75em;color:#888;"> ±6.5</span>",
      • 1.02
      ],
    • [
      • "20",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/o3-mini</a>",
      • "<span style="display:none">000153.2</span>153.2<span style="font-size:0.75em;color:#888;"> ±15.6</span>",
      • "142.0<span style="font-size:0.75em;color:#888;"> ±27.0</span>",
      • "160.5<span style="font-size:0.75em;color:#888;"> ±33.5</span>",
      • "157.0<span style="font-size:0.75em;color:#888;"> ±18.2</span>",
      • 0.17
      ],
    • [
      • "21",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-4o</a>",
      • "<span style="display:none">000167.2</span>167.2<span style="font-size:0.75em;color:#888;"> ±16.6</span>",
      • "57.5<span style="font-size:0.75em;color:#888;"> ±23.0</span>",
      • "48.5<span style="font-size:0.75em;color:#888;"> ±21.0</span>",
      • "395.5<span style="font-size:0.75em;color:#888;"> ±30.8</span>",
      • 0.56
      ],
    • [
      • "22",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-4.1</a>",
      • "<span style="display:none">000168.0</span>168.0<span style="font-size:0.75em;color:#888;"> ±19.6</span>",
      • "92.5<span style="font-size:0.75em;color:#888;"> ±27.8</span>",
      • "83.5<span style="font-size:0.75em;color:#888;"> ±39.0</span>",
      • "328.0<span style="font-size:0.75em;color:#888;"> ±41.0</span>",
      • 0.54
      ],
    • [
      • "23",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-5-nano</a>",
      • "<span style="display:none">000175.0</span>175.0<span style="font-size:0.75em;color:#888;"> ±24.5</span>",
      • "353.0<span style="font-size:0.75em;color:#888;"> ±56.0</span>",
      • "82.5<span style="font-size:0.75em;color:#888;"> ±20.8</span>",
      • "89.5<span style="font-size:0.75em;color:#888;"> ±48.0</span>",
      • 0.02
      ],
    • [
      • "24",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-4o-mini</a>",
      • "<span style="display:none">000183.2</span>183.2<span style="font-size:0.75em;color:#888;"> ±25.8</span>",
      • "72.5<span style="font-size:0.75em;color:#888;"> ±35.8</span>",
      • "104.5<span style="font-size:0.75em;color:#888;"> ±48.0</span>",
      • "372.5<span style="font-size:0.75em;color:#888;"> ±53.5</span>",
      • 0.04
      ],
    • [
      • "25",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-2.5-flash (reasoning)</a>",
      • "<span style="display:none">000195.2</span>195.2<span style="font-size:0.75em;color:#888;"> ±76.7</span>",
      • "337.5<span style="font-size:0.75em;color:#888;"> ±143.2</span>",
      • "68.5<span style="font-size:0.75em;color:#888;"> ±24.5</span>",
      • "179.5<span style="font-size:0.75em;color:#888;"> ±150.8</span>",
      • 0.28
      ],
    • [
      • "26",
      • "<a target="_blank" href="https://docs.anthropic.com/en/docs/models-overview" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">anthropic/claude-3.5-sonnet</a>",
      • "<span style="display:none">000199.7</span>199.7<span style="font-size:0.75em;color:#888;"> ±24.5</span>",
      • "132.5<span style="font-size:0.75em;color:#888;"> ±14.5</span>",
      • "180.5<span style="font-size:0.75em;color:#888;"> ±38.5</span>",
      • "286.0<span style="font-size:0.75em;color:#888;"> ±53.8</span>",
      • 2.53
      ],
    • [
      • "27",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-oss-20b</a>",
      • "<span style="display:none">000199.7</span>199.7<span style="font-size:0.75em;color:#888;"> ±40.0</span>",
      • "110.5<span style="font-size:0.75em;color:#888;"> ±14.8</span>",
      • "67.0<span style="font-size:0.75em;color:#888;"> ±47.0</span>",
      • "421.5<span style="font-size:0.75em;color:#888;"> ±111.5</span>",
      • 0.02
      ],
    • [
      • "28",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-2.5-flash</a>",
      • "<span style="display:none">000210.5</span>210.5<span style="font-size:0.75em;color:#888;"> ±84.2</span>",
      • "482.5<span style="font-size:0.75em;color:#888;"> ±92.2</span>",
      • "42.0<span style="font-size:0.75em;color:#888;"> ±4.8</span>",
      • "107.0<span style="font-size:0.75em;color:#888;"> ±179.5</span>",
      • 0.31
      ],
    • [
      • "29",
      • "<a target="_blank" href="https://www.deepseek.com/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">deepseek/deepseek-v3.2 (reasoning)</a>",
      • "<span style="display:none">000215.0</span>215.0<span style="font-size:0.75em;color:#888;"> ±82.1</span>",
      • "123.5<span style="font-size:0.75em;color:#888;"> ±58.0</span>",
      • "314.0<span style="font-size:0.75em;color:#888;"> ±85.0</span>",
      • "207.5<span style="font-size:0.75em;color:#888;"> ±215.0</span>",
      • 0.03
      ],
    • [
      • "30",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-3-pro-preview</a>",
      • "<span style="display:none">000226.0</span>226.0<span style="font-size:0.75em;color:#888;"> ±63.1</span>",
      • "334.5<span style="font-size:0.75em;color:#888;"> ±175.8</span>",
      • "86.5<span style="font-size:0.75em;color:#888;"> ±32.8</span>",
      • "257.0<span style="font-size:0.75em;color:#888;"> ±43.0</span>",
      • 1.2
      ],
    • [
      • "31",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-2.0-flash-lite-001</a>",
      • "<span style="display:none">000227.7</span>227.7<span style="font-size:0.75em;color:#888;"> ±95.2</span>",
      • "138.0<span style="font-size:0.75em;color:#888;"> ±183.5</span>",
      • "42.0<span style="font-size:0.75em;color:#888;"> ±4.0</span>",
      • "503.0<span style="font-size:0.75em;color:#888;"> ±188.8</span>",
      • 0.04
      ],
    • [
      • "32",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-5-mini</a>",
      • "<span style="display:none">000250.5</span>250.5<span style="font-size:0.75em;color:#888;"> ±28.5</span>",
      • "412.0<span style="font-size:0.75em;color:#888;"> ±68.0</span>",
      • "105.0<span style="font-size:0.75em;color:#888;"> ±17.0</span>",
      • "234.5<span style="font-size:0.75em;color:#888;"> ±39.0</span>",
      • 0.15
      ],
    • [
      • "33",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-4-turbo</a>",
      • "<span style="display:none">000252.5</span>252.5<span style="font-size:0.75em;color:#888;"> ±36.8</span>",
      • "78.5<span style="font-size:0.75em;color:#888;"> ±45.5</span>",
      • "63.0<span style="font-size:0.75em;color:#888;"> ±45.5</span>",
      • "616.0<span style="font-size:0.75em;color:#888;"> ±93.2</span>",
      • 2.66
      ],
    • [
      • "34",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen3-235b-a22b-2507 (reasoning)</a>",
      • "<span style="display:none">000260.3</span>260.3<span style="font-size:0.75em;color:#888;"> ±38.8</span>",
      • "130.0<span style="font-size:0.75em;color:#888;"> ±18.5</span>",
      • "124.0<span style="font-size:0.75em;color:#888;"> ±64.0</span>",
      • "527.0<span style="font-size:0.75em;color:#888;"> ±76.0</span>",
      • 0.04
      ],
    • [
      • "35",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen3-235b-a22b-2507</a>",
      • "<span style="display:none">000264.0</span>264.0<span style="font-size:0.75em;color:#888;"> ±37.0</span>",
      • "138.0<span style="font-size:0.75em;color:#888;"> ±20.0</span>",
      • "136.5<span style="font-size:0.75em;color:#888;"> ±62.5</span>",
      • "517.5<span style="font-size:0.75em;color:#888;"> ±74.1</span>",
      • 0.04
      ],
    • [
      • "36",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen-plus</a>",
      • "<span style="display:none">000267.3</span>267.3<span style="font-size:0.75em;color:#888;"> ±35.5</span>",
      • "124.0<span style="font-size:0.75em;color:#888;"> ±18.0</span>",
      • "119.5<span style="font-size:0.75em;color:#888;"> ±68.2</span>",
      • "558.5<span style="font-size:0.75em;color:#888;"> ±68.8</span>",
      • 0.11
      ],
    • [
      • "37",
      • "<a target="_blank" href="https://huggingface.co/allenai/olmo-2-0325-32b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/olmo-2-0325-32b-instruct</a>",
      • "<span style="display:none">000268.7</span>268.7<span style="font-size:0.75em;color:#888;"> ±50.3</span>",
      • "620.5<span style="font-size:0.75em;color:#888;"> ±129.0</span>",
      • "164.5<span style="font-size:0.75em;color:#888;"> ±42.0</span>",
      • "21.0<span style="font-size:0.75em;color:#888;"> ±22.0</span>",
      • 0.02
      ],
    • [
      • "38",
      • "<a target="_blank" href="https://platform.openai.com/docs/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/gpt-oss-120b</a>",
      • "<span style="display:none">000269.7</span>269.7<span style="font-size:0.75em;color:#888;"> ±69.6</span>",
      • "142.0<span style="font-size:0.75em;color:#888;"> ±64.1</span>",
      • "128.5<span style="font-size:0.75em;color:#888;"> ±56.1</span>",
      • "538.5<span style="font-size:0.75em;color:#888;"> ±214.0</span>",
      • 0.03
      ],
    • [
      • "39",
      • "<a target="_blank" href="https://docs.anthropic.com/en/docs/models-overview" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">anthropic/claude-sonnet-4.5 (reasoning)</a>",
      • "<span style="display:none">000278.7</span>278.7<span style="font-size:0.75em;color:#888;"> ±41.7</span>",
      • "289.5<span style="font-size:0.75em;color:#888;"> ±96.0</span>",
      • "131.5<span style="font-size:0.75em;color:#888;"> ±22.8</span>",
      • "415.0<span style="font-size:0.75em;color:#888;"> ±66.3</span>",
      • 1.65
      ],
    • [
      • "40",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-2.5-flash-lite (reasoning)</a>",
      • "<span style="display:none">000280.5</span>280.5<span style="font-size:0.75em;color:#888;"> ±56.8</span>",
      • "186.0<span style="font-size:0.75em;color:#888;"> ±125.5</span>",
      • "47.5<span style="font-size:0.75em;color:#888;"> ±16.8</span>",
      • "608.0<span style="font-size:0.75em;color:#888;"> ±122.5</span>",
      • 0.06
      ],
    • [
      • "41",
      • "<a target="_blank" href="https://huggingface.co/microsoft/phi-4-reasoning-plus (reasoning)" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4-reasoning-plus (reasoning)</a>",
      • "<span style="display:none">000281.0</span>281.0<span style="font-size:0.75em;color:#888;"> ±19.7</span>",
      • "207.0<span style="font-size:0.75em;color:#888;"> ±21.5</span>",
      • "184.5<span style="font-size:0.75em;color:#888;"> ±34.2</span>",
      • "451.5<span style="font-size:0.75em;color:#888;"> ±43.8</span>",
      • 0.03
      ],
    • [
      • "42",
      • "<a target="_blank" href="https://huggingface.co/microsoft/phi-4-reasoning-plus" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">microsoft/phi-4-reasoning-plus</a>",
      • "<span style="display:none">000281.2</span>281.2<span style="font-size:0.75em;color:#888;"> ±18.2</span>",
      • "210.0<span style="font-size:0.75em;color:#888;"> ±16.5</span>",
      • "187.5<span style="font-size:0.75em;color:#888;"> ±33.8</span>",
      • "446.0<span style="font-size:0.75em;color:#888;"> ±38.8</span>",
      • 0.03
      ],
    • [
      • "43",
      • "<a target="_blank" href="https://x.ai/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">x-ai/grok-code-fast-1</a>",
      • "<span style="display:none">000284.5</span>284.5<span style="font-size:0.75em;color:#888;"> ±41.8</span>",
      • "137.5<span style="font-size:0.75em;color:#888;"> ±48.5</span>",
      • "474.5<span style="font-size:0.75em;color:#888;"> ±72.8</span>",
      • "241.5<span style="font-size:0.75em;color:#888;"> ±114.8</span>",
      • 0.14
      ],
    • [
      • "44",
      • "<a target="_blank" href="https://docs.anthropic.com/en/docs/models-overview" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">anthropic/claude-sonnet-4.5</a>",
      • "<span style="display:none">000285.0</span>285.0<span style="font-size:0.75em;color:#888;"> ±39.4</span>",
      • "283.5<span style="font-size:0.75em;color:#888;"> ±84.8</span>",
      • "130.0<span style="font-size:0.75em;color:#888;"> ±27.3</span>",
      • "441.5<span style="font-size:0.75em;color:#888;"> ±62.3</span>",
      • 1.63
      ],
    • [
      • "45",
      • "<a target="_blank" href="https://huggingface.co/minimax/minimax-m2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">minimax/minimax-m2</a>",
      • "<span style="display:none">000297.3</span>297.3<span style="font-size:0.75em;color:#888;"> ±106.3</span>",
      • "138.5<span style="font-size:0.75em;color:#888;"> ±34.5</span>",
      • "385.5<span style="font-size:0.75em;color:#888;"> ±147.1</span>",
      • "368.0<span style="font-size:0.75em;color:#888;"> ±254.5</span>",
      • 0.1
      ],
    • [
      • "46",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/llama-3.3-70b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/llama-3.3-70b-instruct</a>",
      • "<span style="display:none">000312.8</span>312.8<span style="font-size:0.75em;color:#888;"> ±45.0</span>",
      • "164.0<span style="font-size:0.75em;color:#888;"> ±33.2</span>",
      • "125.5<span style="font-size:0.75em;color:#888;"> ±73.8</span>",
      • "649.0<span style="font-size:0.75em;color:#888;"> ±102.2</span>",
      • 0.03
      ],
    • [
      • "47",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/llama-3.1-8b-instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/llama-3.1-8b-instruct</a>",
      • "<span style="display:none">000315.7</span>315.7<span style="font-size:0.75em;color:#888;"> ±45.2</span>",
      • "135.0<span style="font-size:0.75em;color:#888;"> ±19.2</span>",
      • "96.5<span style="font-size:0.75em;color:#888;"> ±56.0</span>",
      • "715.5<span style="font-size:0.75em;color:#888;"> ±116.0</span>",
      • 0
      ],
    • [
      • "48",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen3-coder (reasoning)</a>",
      • "<span style="display:none">000325.5</span>325.5<span style="font-size:0.75em;color:#888;"> ±30.3</span>",
      • "197.0<span style="font-size:0.75em;color:#888;"> ±35.3</span>",
      • "214.5<span style="font-size:0.75em;color:#888;"> ±41.8</span>",
      • "565.0<span style="font-size:0.75em;color:#888;"> ±76.5</span>",
      • 0.1
      ],
    • [
      • "49",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen3-coder</a>",
      • "<span style="display:none">000326.3</span>326.3<span style="font-size:0.75em;color:#888;"> ±22.3</span>",
      • "194.0<span style="font-size:0.75em;color:#888;"> ±25.0</span>",
      • "202.0<span style="font-size:0.75em;color:#888;"> ±39.1</span>",
      • "583.0<span style="font-size:0.75em;color:#888;"> ±45.5</span>",
      • 0.1
      ],
    • [
      • "50",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-2.0-flash-001</a>",
      • "<span style="display:none">000326.5</span>326.5<span style="font-size:0.75em;color:#888;"> ±87.0</span>",
      • "273.0<span style="font-size:0.75em;color:#888;"> ±141.5</span>",
      • "40.0<span style="font-size:0.75em;color:#888;"> ±4.0</span>",
      • "666.5<span style="font-size:0.75em;color:#888;"> ±204.9</span>",
      • 0.06
      ],
    • [
      • "51",
      • "<a target="_blank" href="https://docs.anthropic.com/en/docs/models-overview" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">anthropic/claude-haiku-4.5</a>",
      • "<span style="display:none">000333.2</span>333.2<span style="font-size:0.75em;color:#888;"> ±26.7</span>",
      • "231.0<span style="font-size:0.75em;color:#888;"> ±54.2</span>",
      • "125.0<span style="font-size:0.75em;color:#888;"> ±29.0</span>",
      • "643.5<span style="font-size:0.75em;color:#888;"> ±43.0</span>",
      • 0.64
      ],
    • [
      • "52",
      • "<a target="_blank" href="https://docs.anthropic.com/en/docs/models-overview" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">anthropic/claude-haiku-4.5 (reasoning)</a>",
      • "<span style="display:none">000335.2</span>335.2<span style="font-size:0.75em;color:#888;"> ±26.8</span>",
      • "237.0<span style="font-size:0.75em;color:#888;"> ±54.8</span>",
      • "125.0<span style="font-size:0.75em;color:#888;"> ±32.8</span>",
      • "643.5<span style="font-size:0.75em;color:#888;"> ±40.0</span>",
      • 0.65
      ],
    • [
      • "53",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-3-flash-preview</a>",
      • "<span style="display:none">000339.3</span>339.3<span style="font-size:0.75em;color:#888;"> ±50.4</span>",
      • "426.5<span style="font-size:0.75em;color:#888;"> ±123.5</span>",
      • "120.5<span style="font-size:0.75em;color:#888;"> ±49.8</span>",
      • "471.0<span style="font-size:0.75em;color:#888;"> ±58.0</span>",
      • 0.37
      ],
    • [
      • "54",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-3-flash-preview (reasoning)</a>",
      • "<span style="display:none">000340.8</span>340.8<span style="font-size:0.75em;color:#888;"> ±66.4</span>",
      • "395.0<span style="font-size:0.75em;color:#888;"> ±175.5</span>",
      • "117.5<span style="font-size:0.75em;color:#888;"> ±60.3</span>",
      • "510.0<span style="font-size:0.75em;color:#888;"> ±53.8</span>",
      • 0.42
      ],
    • [
      • "55",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/llama-4-maverick" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/llama-4-maverick</a>",
      • "<span style="display:none">000342.8</span>342.8<span style="font-size:0.75em;color:#888;"> ±64.6</span>",
      • "139.0<span style="font-size:0.75em;color:#888;"> ±33.0</span>",
      • "232.0<span style="font-size:0.75em;color:#888;"> ±55.8</span>",
      • "657.5<span style="font-size:0.75em;color:#888;"> ±173.5</span>",
      • 0.07
      ],
    • [
      • "56",
      • "<a target="_blank" href="https://www.deepseek.com/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">deepseek/deepseek-chat</a>",
      • "<span style="display:none">000345.7</span>345.7<span style="font-size:0.75em;color:#888;"> ±22.3</span>",
      • "142.0<span style="font-size:0.75em;color:#888;"> ±28.5</span>",
      • "329.5<span style="font-size:0.75em;color:#888;"> ±45.5</span>",
      • "565.5<span style="font-size:0.75em;color:#888;"> ±41.0</span>",
      • 0.15
      ],
    • [
      • "57",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/llama-4-scout" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/llama-4-scout</a>",
      • "<span style="display:none">000350.5</span>350.5<span style="font-size:0.75em;color:#888;"> ±39.3</span>",
      • "138.5<span style="font-size:0.75em;color:#888;"> ±16.0</span>",
      • "177.5<span style="font-size:0.75em;color:#888;"> ±73.0</span>",
      • "735.5<span style="font-size:0.75em;color:#888;"> ±85.5</span>",
      • 0.04
      ],
    • [
      • "58",
      • "<a target="_blank" href="https://docs.mistral.ai/getting-started/models/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/mistral-medium-3</a>",
      • "<span style="display:none">000361.0</span>361.0<span style="font-size:0.75em;color:#888;"> ±34.0</span>",
      • "138.0<span style="font-size:0.75em;color:#888;"> ±51.0</span>",
      • "376.5<span style="font-size:0.75em;color:#888;"> ±59.5</span>",
      • "568.5<span style="font-size:0.75em;color:#888;"> ±53.2</span>",
      • 0.28
      ],
    • [
      • "59",
      • "<a target="_blank" href="https://docs.mistral.ai/getting-started/models/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/mistral-small-3.1-24b-instruct</a>",
      • "<span style="display:none">000389.2</span>389.2<span style="font-size:0.75em;color:#888;"> ±37.0</span>",
      • "190.0<span style="font-size:0.75em;color:#888;"> ±37.0</span>",
      • "215.0<span style="font-size:0.75em;color:#888;"> ±37.0</span>",
      • "762.5<span style="font-size:0.75em;color:#888;"> ±83.6</span>",
      • 0.01
      ],
    • [
      • "60",
      • "<a target="_blank" href="https://docs.anthropic.com/en/docs/models-overview" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">anthropic/claude-3.5-haiku</a>",
      • "<span style="display:none">000401.2</span>401.2<span style="font-size:0.75em;color:#888;"> ±25.4</span>",
      • "168.0<span style="font-size:0.75em;color:#888;"> ±30.5</span>",
      • "280.5<span style="font-size:0.75em;color:#888;"> ±15.5</span>",
      • "755.0<span style="font-size:0.75em;color:#888;"> ±61.5</span>",
      • 0.59
      ],
    • [
      • "61",
      • "<a target="_blank" href="https://huggingface.co/z-ai/glm-4.6v" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">z-ai/glm-4.6v</a>",
      • "<span style="display:none">000410.5</span>410.5<span style="font-size:0.75em;color:#888;"> ±104.0</span>",
      • "196.0<span style="font-size:0.75em;color:#888;"> ±55.3</span>",
      • "143.0<span style="font-size:0.75em;color:#888;"> ±70.3</span>",
      • "892.5<span style="font-size:0.75em;color:#888;"> ±310.3</span>",
      • 0.14
      ],
    • [
      • "62",
      • "<a target="_blank" href="https://www.deepseek.com/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">deepseek/deepseek-v3.2</a>",
      • "<span style="display:none">000426.2</span>426.2<span style="font-size:0.75em;color:#888;"> ±61.5</span>",
      • "234.5<span style="font-size:0.75em;color:#888;"> ±128.3</span>",
      • "303.0<span style="font-size:0.75em;color:#888;"> ±109.0</span>",
      • "741.0<span style="font-size:0.75em;color:#888;"> ±62.2</span>",
      • 0.06
      ],
    • [
      • "63",
      • "<a target="_blank" href="https://docs.mistral.ai/getting-started/models/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/mistral-large-2512</a>",
      • "<span style="display:none">000444.0</span>444.0<span style="font-size:0.75em;color:#888;"> ±60.3</span>",
      • "405.0<span style="font-size:0.75em;color:#888;"> ±126.0</span>",
      • "370.5<span style="font-size:0.75em;color:#888;"> ±77.0</span>",
      • "556.5<span style="font-size:0.75em;color:#888;"> ±75.8</span>",
      • 0.28
      ],
    • [
      • "64",
      • "<a target="_blank" href="https://docs.mistral.ai/getting-started/models/" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/devstral-2512</a>",
      • "<span style="display:none">000449.0</span>449.0<span style="font-size:0.75em;color:#888;"> ±53.3</span>",
      • "341.0<span style="font-size:0.75em;color:#888;"> ±113.5</span>",
      • "447.0<span style="font-size:0.75em;color:#888;"> ±74.5</span>",
      • "559.0<span style="font-size:0.75em;color:#888;"> ±60.5</span>",
      • 0.03
      ],
    • [
      • "65",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-2.5-flash-lite</a>",
      • "<span style="display:none">000476.2</span>476.2<span style="font-size:0.75em;color:#888;"> ±73.5</span>",
      • "450.0<span style="font-size:0.75em;color:#888;"> ±121.5</span>",
      • "42.5<span style="font-size:0.75em;color:#888;"> ±4.5</span>",
      • "936.0<span style="font-size:0.75em;color:#888;"> ±174.5</span>",
      • 0.07
      ],
    • [
      • "66",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen3-vl-8b-instruct</a>",
      • "<span style="display:none">000518.0</span>518.0<span style="font-size:0.75em;color:#888;"> ±38.6</span>",
      • "281.0<span style="font-size:0.75em;color:#888;"> ±48.8</span>",
      • "564.5<span style="font-size:0.75em;color:#888;"> ±54.3</span>",
      • "708.5<span style="font-size:0.75em;color:#888;"> ±93.5</span>",
      • 0.1
      ],
    • [
      • "67",
      • "<a target="_blank" href="https://huggingface.co/allenai/olmo-3.1-32b-think:free" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">allenai/olmo-3.1-32b-think:free</a>",
      • "<span style="display:none">000530.5</span>530.5<span style="font-size:0.75em;color:#888;"> ±48.2</span>",
      • "172.0<span style="font-size:0.75em;color:#888;"> ±42.3</span>",
      • "596.0<span style="font-size:0.75em;color:#888;"> ±105.8</span>",
      • "823.5<span style="font-size:0.75em;color:#888;"> ±91.0</span>",
      • 0
      ],
    • [
      • "68",
      • "<a target="_blank" href="https://ai.google.dev/models" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemini-2.5-pro</a>",
      • "<span style="display:none">000539.8</span>539.8<span style="font-size:0.75em;color:#888;"> ±98.2</span>",
      • "391.0<span style="font-size:0.75em;color:#888;"> ±139.5</span>",
      • "195.5<span style="font-size:0.75em;color:#888;"> ±73.0</span>",
      • "1033.0<span style="font-size:0.75em;color:#888;"> ±230.5</span>",
      • 2.71
      ],
    • [
      • "69",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen3-14b</a>",
      • "<span style="display:none">000558.0</span>558.0<span style="font-size:0.75em;color:#888;"> ±32.0</span>",
      • "134.0<span style="font-size:0.75em;color:#888;"> ±18.2</span>",
      • "536.0<span style="font-size:0.75em;color:#888;"> ±87.3</span>",
      • "1004.0<span style="font-size:0.75em;color:#888;"> ±46.0</span>",
      • 0.04
      ],
    • [
      • "70",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen3-vl-235b-a22b-thinking</a>",
      • "<span style="display:none">000602.8</span>602.8<span style="font-size:0.75em;color:#888;"> ±59.9</span>",
      • "214.5<span style="font-size:0.75em;color:#888;"> ±79.0</span>",
      • "830.5<span style="font-size:0.75em;color:#888;"> ±110.0</span>",
      • "763.5<span style="font-size:0.75em;color:#888;"> ±106.5</span>",
      • 0.25
      ],
    • [
      • "71",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen3-32b</a>",
      • "<span style="display:none">000725.3</span>725.3<span style="font-size:0.75em;color:#888;"> ±47.8</span>",
      • "205.0<span style="font-size:0.75em;color:#888;"> ±47.0</span>",
      • "805.0<span style="font-size:0.75em;color:#888;"> ±95.8</span>",
      • "1166.0<span style="font-size:0.75em;color:#888;"> ±92.2</span>",
      • 0.05
      ],
    • [
      • "72",
      • "<a target="_blank" href="https://huggingface.co/amazon/nova-2-lite-v1 (reasoning)" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">amazon/nova-2-lite-v1 (reasoning)</a>",
      • "<span style="display:none">000788.3</span>788.3<span style="font-size:0.75em;color:#888;"> ±123.8</span>",
      • "1155.5<span style="font-size:0.75em;color:#888;"> ±319.5</span>",
      • "228.5<span style="font-size:0.75em;color:#888;"> ±74.0</span>",
      • "981.0<span style="font-size:0.75em;color:#888;"> ±182.0</span>",
      • 1.96
      ],
    • [
      • "73",
      • "<a target="_blank" href="https://huggingface.co/Qwen" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">qwen/qwen-plus (reasoning)</a>",
      • "<span style="display:none">000800.3</span>800.3<span style="font-size:0.75em;color:#888;"> ±78.9</span>",
      • "265.0<span style="font-size:0.75em;color:#888;"> ±70.0</span>",
      • "1125.5<span style="font-size:0.75em;color:#888;"> ±160.3</span>",
      • "1010.5<span style="font-size:0.75em;color:#888;"> ±129.0</span>",
      • 0.37
      ],
    • [
      • "74",
      • "<a target="_blank" href="https://huggingface.co/amazon/nova-2-lite-v1" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">amazon/nova-2-lite-v1</a>",
      • "<span style="display:none">000896.3</span>896.3<span style="font-size:0.75em;color:#888;"> ±91.3</span>",
      • "568.5<span style="font-size:0.75em;color:#888;"> ±121.8</span>",
      • "585.0<span style="font-size:0.75em;color:#888;"> ±120.0</span>",
      • "1535.5<span style="font-size:0.75em;color:#888;"> ±184.8</span>",
      • 0.81
      ],
    • [
      • "75",
      • "<a target="_blank" href="https://huggingface.co/z-ai/glm-4.6:exacto" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">z-ai/glm-4.6:exacto</a>",
      • "<span style="display:none">000930.7</span>930.7<span style="font-size:0.75em;color:#888;"> ±244.3</span>",
      • "1064.0<span style="font-size:0.75em;color:#888;"> ±438.0</span>",
      • "410.5<span style="font-size:0.75em;color:#888;"> ±123.5</span>",
      • "1317.5<span style="font-size:0.75em;color:#888;"> ±522.0</span>",
      • 0.56
      ],
    • [
      • "76",
      • "<a target="_blank" href="https://huggingface.co/z-ai/glm-4.5" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">z-ai/glm-4.5</a>",
      • "<span style="display:none">001427.0</span>1427.0<span style="font-size:0.75em;color:#888;"> ±487.2</span>",
      • "1199.5<span style="font-size:0.75em;color:#888;"> ±784.5</span>",
      • "985.5<span style="font-size:0.75em;color:#888;"> ±322.6</span>",
      • "2096.0<span style="font-size:0.75em;color:#888;"> ±822.5</span>",
      • 0.76
      ]
    ],
  • "metadata": null
}