加载中...
加载中...
| 80.84 |
| 76.00 |
| 95.00 |
| 3 | Claude Sonnet 4 | 80.20 | 66.00 | 0.00 |
| 4 | MiniMax M2.5 | 80.20 | 0.00 | 0.00 |
| 5 | Claude Opus 4.1 | 79.40 | 65.00 | 0.00 |
| 6 | GPT-5.1 | 76.30 | 0.00 | 0.00 |
| 7 | Qwen3-Max-Thinking | 75.30 | 85.90 | 0.00 |
| 8 | o3-pro | 75.00 | 0.00 | 0.00 |
| 9 | DeepSeek V3.2 | 73.10 | 83.30 | 0.00 |
| 10 | Claude Opus 4 | 72.50 | 56.60 | 0.00 |
| 11 | Qwen3.5-27B | 72.40 | 80.70 | 0.00 |
| 12 | Kimi K2 Thinking | 71.30 | 83.10 | 0.00 |
| 13 | OpenAI o3 | 69.10 | 75.80 | 0.00 |
| 14 | OpenAI o4 - mini | 68.10 | 0.00 | 0.00 |
| 15 | DeepSeek V3.2-Exp | 67.80 | 74.10 | 0.00 |
| 16 | Gemini 2.5-Pro | 67.20 | 77.10 | 0.00 |
| 17 | GLM-4.5 | 64.20 | 72.90 | 0.00 |
| 18 | Gemini 2.5 Pro Experimental 03-25 | 63.80 | 70.40 | 0.00 |
| 19 | Gemini-2.5-Pro-Preview-05-06 | 63.20 | 77.10 | 0.00 |
| 20 | GPT OSS 120B | 60.10 | 0.00 | 0.00 |
| 21 | GLM-4.7-Flash | 59.20 | 0.00 | 0.00 |
| 22 | Grok 4 | 58.60 | 82.00 | 0.00 |
| 23 | DeepSeek-R1-0528 | 57.60 | 73.30 | 0.00 |
| 24 | GLM-4.5-Air | 57.60 | 70.70 | 0.00 |
| 25 | MiniMax-M1-80k | 56.00 | 65.00 | 0.00 |
| 26 | MiniMax-M1-40k | 55.60 | 62.30 | 0.00 |
| 27 | Grok 4.1 | 54.60 | 0.00 | 0.00 |
| 28 | Gemini 2.5 Flash | 50.00 | 55.40 | 0.00 |
| 29 | OpenAI o3-mini (high) | 49.30 | 69.50 | 97.60 |
| 30 | DeepSeek-R1 | 49.20 | 65.90 | 0.00 |