加载中...
加载中...
| 3 |
| Gemini 3.0 Flash |
| 99.70 |
| 0.00 |
| 0.00 |
| 0.00 |
| 4 | GPT-5 | 99.60 | 0.00 | 0.00 | 0.00 |
| 5 | OpenAI o4 - mini | 99.50 | 98.70 | 0.00 | 0.00 |
| 6 | Gemini 2.5 Deep Think | 99.20 | 0.00 | 0.00 | 0.00 |
| 7 | Kimi K2 Thinking | 99.10 | 0.00 | 0.00 | 0.00 |
| 8 | Grok 4 | 98.80 | 0.00 | 0.00 | 0.00 |
| 9 | GPT OSS 20B | 98.70 | 96.00 | 0.00 | 0.00 |
| 10 | GLM-4.6 | 98.60 | 0.00 | 0.00 | 0.00 |
| 11 | GLM-4.6 | 98.60 | 0.00 | 0.00 | 0.00 |
| 12 | GPT OSS 120B | 97.90 | 96.60 | 0.00 | 0.00 |
| 13 | Step 3.5 Flash | 97.30 | 0.00 | 0.00 | 0.00 |
| 14 | GPT-5-Pro | 96.70 | 0.00 | 0.00 | 0.00 |
| 15 | Haiku 4.5 | 96.30 | 0.00 | 0.00 | 0.00 |
| 16 | Kimi K2.5 | 96.10 | 0.00 | 0.00 | 0.00 |
| 17 | DeepSeek V3.2 Speciale | 96.00 | 0.00 | 0.00 | 0.00 |
| 18 | GLM-4.7 | 95.70 | 0.00 | 0.00 | 0.00 |
| 19 | Gemini 3.0 Flash | 95.20 | 0.00 | 0.00 | 0.00 |
| 20 | Gemini 3.0 Pro (Preview 11-2025) | 95.00 | 0.00 | 0.00 | 0.00 |
| 21 | GPT-5 | 94.60 | 0.00 | 0.00 | 0.00 |
| 22 | Kimi K2 Thinking | 94.50 | 0.00 | 0.00 | 0.00 |
| 23 | GPT-5.1 | 94.00 | 0.00 | 0.00 | 0.00 |
| 24 | GPT-5.1 | 94.00 | 0.00 | 0.00 | 0.00 |
| 25 | DeepSeek V3.2 | 93.10 | 0.00 | 0.00 | 0.00 |
| 26 | o3-pro | 93.00 | 93.00 | 0.00 | 0.00 |
| 27 | OpenAI o4 - mini | 92.70 | 93.40 | 0.00 | 0.00 |
| 28 | Qwen3-235B-A22B-Thinking-2507 | 92.30 | 0.00 | 0.00 | 0.00 |
| 29 | Qwen3-235B-A22B-Thinking | 92.30 | 0.00 | 0.00 | 0.00 |
| 30 | Grok 4 Fast | 92.00 | 0.00 | 0.00 | 0.00 |
| 31 | Grok 4 | 91.70 | 0.00 | 0.00 | 0.00 |
| 32 | GLM-4.7-Flash | 91.60 | 0.00 | 0.00 | 0.00 |
| 33 | DeepSeek-V3.1 Terminus | 90.00 | 0.00 | 0.00 | 0.00 |
| 34 | DeepSeek V3.2-Exp | 89.30 | 0.00 | 0.00 | 0.00 |
| 35 | Grok 4.1 Fast | 89.00 | 0.00 | 0.00 | 0.00 |
| 36 | OpenAI o3 | 88.90 | 0.00 | 0.00 | 0.00 |
| 37 | DeepSeek-V3.1 | 88.40 | 93.10 | 0.00 | 0.00 |
| 38 | Gemini 2.5-Pro | 88.00 | 0.00 | 0.00 | 0.00 |
| 39 | DeepSeek-R1-0528 | 87.50 | 91.40 | 98.00 | 0.00 |
| 40 | Claude Sonnet 4.5 | 87.00 | 0.00 | 0.00 | 0.00 |
| 41 | Gemini 2.5 Pro Experimental 03-25 | 86.90 | 92.00 | 0.00 | 0.00 |
| 42 | OpenAI o3-mini | 86.50 | 60.00 | 95.80 | 0.00 |
| 43 | MiniMax M2.5 | 86.30 | 0.00 | 0.00 | 0.00 |
| 44 | Intern-S1 | 86.00 | 0.00 | 0.00 | 0.00 |
| 45 | Claude Sonnet 4 | 85.00 | 0.00 | 0.00 | 0.00 |
| 46 | GPT OSS 120B | 83.00 | 0.00 | 0.00 | 0.00 |
| 47 | Gemini-2.5-Pro-Preview-05-06 | 83.00 | 92.00 | 98.80 | 0.00 |
| 48 | Step3 | 82.90 | 0.00 | 0.00 | 0.00 |
| 49 | Qwen3-235B-A22B | 81.50 | 85.70 | 98.00 | 0.00 |
| 50 | Qwen3-4B-Thinking-2507 | 81.30 | 0.00 | 0.00 | 0.00 |
| 51 | M2.1 | 81.00 | 0.00 | 0.00 | 0.00 |
| 52 | Haiku 4.5 | 80.70 | 0.00 | 0.00 | 0.00 |
| 53 | Qwen3 Max (Preview) | 80.60 | 0.00 | 0.00 | 0.00 |
| 54 | GPT OSS 20B | 79.00 | 0.00 | 0.00 | 0.00 |
| 55 | Claude Opus 4.1 | 78.00 | 0.00 | 0.00 | 0.00 |
| 56 | Claude Opus 4.1 | 78.00 | 0.00 | 0.00 | 0.00 |
| 57 | MiniMax M2 | 78.00 | 0.00 | 0.00 | 0.00 |
| 58 | Grok 3 | 77.10 | 84.20 | 0.00 | 0.00 |
| 59 | MiniMax-M1-80k | 76.90 | 86.00 | 96.80 | 0.00 |
| 60 | Hunyuan-A13B-Instruct | 76.80 | 87.30 | 0.00 | 91.83 |
| 61 | Claude Opus 4 | 75.50 | 76.00 | 98.20 | 0.00 |
| 62 | Hunyuan-7B | 75.30 | 81.10 | 93.70 | 0.00 |
| 63 | Kimi K2 0905 | 75.20 | 0.00 | 0.00 | 0.00 |
| 64 | MiniMax-M1-40k | 74.60 | 83.30 | 96.00 | 0.00 |
| 65 | Qwen3-32B | 72.90 | 81.40 | 0.00 | 0.00 |
| 66 | Qwen3-32B | 72.90 | 81.40 | 97.20 | 0.00 |
| 67 | Gemini 2.5 Flash | 72.00 | 0.00 | 0.00 | 0.00 |
| 68 | Claude Sonnet 4 | 70.50 | 0.00 | 0.00 | 0.00 |
| 69 | Qwen3-235B-A22B-2507 | 70.30 | 0.00 | 0.00 | 0.00 |
| 70 | DeepSeek-R1 | 70.00 | 79.80 | 97.30 | 0.00 |
| 71 | Qwen3-Next | 69.50 | 0.00 | 0.00 | 90.30 |
| 72 | Pangu Pro MoE | 68.10 | 79.20 | 96.80 | 0.00 |
| 73 | Qwen3-8B | 67.30 | 76.00 | 97.40 | 0.00 |
| 74 | Magistral-Medium-2506 | 64.95 | 73.59 | 0.00 | 0.00 |
| 75 | Gemini 2.5 Flash-Lite | 63.10 | 0.00 | 0.00 | 0.00 |
| 76 | Magistral-Small-2506 | 62.76 | 70.68 | 0.00 | 0.00 |
| 77 | GPT-5 | 61.90 | 0.00 | 0.00 | 0.00 |
| 78 | Gemini 2.5 Flash | 61.60 | 88.00 | 0.00 | 0.00 |
| 79 | Qwen3-30B-A3B-2507 | 61.30 | 0.00 | 0.00 | 0.00 |
| 80 | DeepSeek V3.2-Exp | 58.00 | 0.00 | 0.00 | 0.00 |
| 81 | Claude Sonnet 3.7 | 54.80 | 23.30 | 82.20 | 0.00 |
| 82 | Kimi K2 | 54.00 | 69.60 | 97.40 | 0.00 |
| 83 | DeepSeek-V3.1 Terminus | 54.00 | 0.00 | 0.00 | 0.00 |
| 84 | DeepSeek-V3.1 | 49.80 | 66.30 | 0.00 | 0.00 |
| 85 | DeepSeek-V3-0324 | 47.70 | 59.40 | 94.00 | 96.30 |
| 86 | Qwen3-4B-2507 | 47.40 | 0.00 | 0.00 | 0.00 |
| 87 | GPT-5-mini | 47.00 | 0.00 | 0.00 | 0.00 |
| 88 | GPT-5-mini | 47.00 | 0.00 | 0.00 | 0.00 |
| 89 | GLM-4.6 | 44.00 | 0.00 | 0.00 | 0.00 |
| 90 | GPT-4o | 42.10 | 0.00 | 0.00 | 0.00 |
| 91 | Haiku 4.5 | 39.00 | 0.00 | 0.00 | 0.00 |
| 92 | Claude Sonnet 4 | 38.00 | 43.40 | 0.00 | 0.00 |
| 93 | Claude Sonnet 4.5 | 37.00 | 0.00 | 0.00 | 0.00 |
| 94 | GPT-4.1 | 36.70 | 48.10 | 92.80 | 95.90 |
| 95 | ERNIE-4.5-VL-424B-A47B-Base | 35.10 | 0.00 | 0.00 | 0.00 |
| 96 | ERNIE-4.5-300B-A47B | 35.10 | 54.80 | 96.40 | 96.60 |
| 97 | Gemini 2.0 Flash Experimental | 29.70 | 0.00 | 0.00 | 0.00 |
| 98 | GPT-4o(2025-03-27) | 26.70 | 0.00 | 0.00 | 0.00 |
| 99 | Qwen3-235B-A22B | 24.70 | 85.70 | 96.20 | 96.40 |
| 100 | Qwen3-30B-A3B | 21.60 | 0.00 | 0.00 | 0.00 |
| 101 | Qwen3-8B | 20.90 | 79.40 | 87.40 | 0.00 |
| 102 | Kimi K2 Thinking | 100.00 | 0.00 | 0.00 | 0.00 |
| 103 | Claude Sonnet 4.5 | 100.00 | 0.00 | 0.00 | 0.00 |
| 104 | GPT-5-Pro | 100.00 | 0.00 | 0.00 | 0.00 |
| 105 | Grok 4 Heavy | 100.00 | 0.00 | 0.00 | 0.00 |
| 106 | GPT-5.2 | 100.00 | 0.00 | 0.00 | 0.00 |
| 107 | Mistral-7B-Instruct-v0.3 | 0.00 | 0.00 | 0.00 | 36.20 |
| 108 | OpenAI o1 | 0.00 | 79.20 | 96.40 | 0.00 |
| 109 | Llama-3.2-3B | 0.00 | 0.00 | 0.00 | 34.00 |
| 110 | Gemini 1.5 Pro | 0.00 | 0.00 | 0.00 | 0.00 |
| 111 | Llama3.1-405B Instruct | 0.00 | 0.00 | 0.00 | 0.00 |
| 112 | Amazon Nova Pro | 0.00 | 0.00 | 0.00 | 0.00 |
| 113 | Gemini 2.5-Pro | 0.00 | 92.00 | 98.80 | 0.00 |
| 114 | GLM-4.5 | 0.00 | 91.00 | 98.20 | 0.00 |
| 115 | OpenAI o3 | 0.00 | 91.60 | 98.10 | 0.00 |
| 116 | GLM-4.5-Air | 0.00 | 89.40 | 98.10 | 0.00 |
| 117 | OpenAI o3-mini (high) | 0.00 | 87.00 | 97.90 | 0.00 |
| 118 | Pangu Embedded | 0.00 | 81.90 | 92.40 | 95.98 |
| 119 | Llama3.1-8B | 0.00 | 0.00 | 0.00 | 55.30 |
| 120 | Gemma 2 - 9B | 0.00 | 0.00 | 0.00 | 70.70 |
| 121 | Moonlight-16B-A3B-Instruct | 0.00 | 0.00 | 0.00 | 77.40 |
| 122 | Qwen2.5-3B | 0.00 | 0.00 | 0.00 | 79.10 |
| 123 | Llama3.1-8B-Instruct | 0.00 | 0.00 | 0.00 | 82.40 |
| 124 | Qwen2.5-7B | 0.00 | 0.00 | 0.00 | 85.40 |
| 125 | Phi-4-mini-instruct (3.8B) | 0.00 | 10.00 | 71.80 | 88.60 |
| 126 | GPT-4o mini | 0.00 | 0.00 | 0.00 | 91.30 |
| 127 | Qwen2.5-72B | 0.00 | 0.00 | 0.00 | 91.50 |
| 128 | Qwen2.5-Max | 0.00 | 0.00 | 0.00 | 94.50 |
| 129 | Claude3-Opus | 0.00 | 0.00 | 0.00 | 95.00 |
| 130 | Qwen2.5-32B | 0.00 | 0.00 | 0.00 | 95.90 |
| 131 | DeepSeek-V3 | 0.00 | 39.00 | 87.80 | 0.00 |
| 132 | Grok 3.5 | 0.00 | 0.00 | 0.00 | 0.00 |
| 133 | Gemma 3 - 27B (IT) | 0.00 | 25.30 | 0.00 | 0.00 |
| 134 | GPT-4.1 nano | 0.00 | 29.40 | 0.00 | 0.00 |
| 135 | Gemini 2.0 Pro Experimental | 0.00 | 36.00 | 0.00 | 0.00 |
| 136 | Grok 3 mini | 0.00 | 40.00 | 0.00 | 0.00 |
| 137 | GPT-4.1 mini | 0.00 | 49.60 | 0.00 | 0.00 |
| 138 | GLM-4-9B-Chat | 0.00 | 76.40 | 0.00 | 0.00 |
| 139 | Grok-3 - Reasoning Beta | 0.00 | 93.30 | 0.00 | 0.00 |
| 140 | Grok-3 mini - Reasoning | 0.00 | 96.00 | 0.00 | 0.00 |
| 141 | GPT-4o | 0.00 | 9.30 | 75.90 | 0.00 |
| 142 | Claude 3.5 Sonnet New | 0.00 | 16.00 | 78.00 | 0.00 |
| 143 | Kimi k1.5 (Long-CoT) | 0.00 | 0.00 | 96.20 | 0.00 |
| 144 | OpenAI o1-mini | 0.00 | 63.60 | 90.00 | 0.00 |
| 145 | Phi-4-instruct (reasoning-trained) | 0.00 | 50.00 | 90.40 | 0.00 |
| 146 | QwQ-32B-Preview | 0.00 | 50.00 | 90.60 | 0.00 |
| 147 | GPT-4.5 | 0.00 | 36.70 | 90.70 | 0.00 |
| 148 | QwQ-32B | 0.00 | 79.50 | 91.00 | 0.00 |
| 149 | DeepSeek-R1-Distill-Qwen-7B | 0.00 | 53.30 | 91.40 | 0.00 |
| 150 | DeepSeek-R1-Distill-Llama-70B | 0.00 | 0.00 | 94.50 | 0.00 |
| 151 | Kimi k1.5 (Short-CoT) | 0.00 | 0.00 | 94.60 | 0.00 |
| 152 | Llama 4 Behemoth Instruct | 0.00 | 0.00 | 95.00 | 0.00 |
| 153 | Hunyuan-T1 | 0.00 | 78.20 | 96.20 | 0.00 |
| 154 | Claude Sonnet 3.7-64K Extended Thinking | 0.00 | 80.00 | 96.20 | 0.00 |