File size: 1,036 Bytes
afb8d0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
{"model": "o1-preview-2024-09-12", "score": 95.9}
{"model": "o1-mini-2024-09-12", "score": 93.8}
{"model": "claude-3-5-sonnet-20241022", "score": 83.9}
{"model": "gemini-1.5-pro-exp-0827", "score": 79.6}
{"model": "gpt-4o-2024-08-06", "score": 87.0}
{"model": "chatgpt-4o-latest-24-09-07", "score": 86.5}
{"model": "gpt-4o-2024-05-13", "score": 86.1}
{"model": "claude-3-5-sonnet-20240620", "score": 80.8}
{"model": "grok-2-1212", "score": 75.3}
{"model": "qwen2.5-72b-instruct", "score": 73.9}
{"model": "llama-3.1-405b-instruct", "score": 73.0}
{"model": "gpt-4-turbo-2024-04-09", "score": 78.9}
{"model": "gemini-1.5-flash-exp-0827", "score": 74.5}
{"model": "mistral-large-2", "score": 75.1}
{"model": "gpt-4o-mini-2024-07-18", "score": 75.9}
{"model": "deepseek-v2.5-0908", "score": 70.0}
{"model": "claude-3-opus-20240229", "score": 70.4}
{"model": "meta-llama-3.1-70b-instruct", "score": 64.3}
{"model": "claude-3-5-haiku-20241022", "score": 68.8}
{"model": "gemini-1.5-pro", "score": 68.0}
{"model": "gpt-4-0314", "score": 74.5}