{"model": "o1-2024-12-17", "score": 81.0} {"model": "o1-preview-2024-09-12", "score": 71.4} {"model": "o1-mini-2024-09-12", "score": 52.6} {"model": "claude-3-5-sonnet-20241022", "score": 36.2} {"model": "gemini-1.5-pro-exp-0827", "score": 30.5} {"model": "gpt-4o-2024-08-06", "score": 31.7} {"model": "chatgpt-4o-latest-24-09-07", "score": 29.9} {"model": "gpt-4o-2024-05-13", "score": 28.2} {"model": "claude-3-5-sonnet-20240620", "score": 33.4} {"model": "grok-2-1212", "score": 27.7} {"model": "qwen2.5-72b-instruct", "score": 26.6} {"model": "llama-3.1-405b-instruct", "score": 30.1} {"model": "gpt-4-turbo-2024-04-09", "score": 28.4} {"model": "gemini-1.5-flash-exp-0827", "score": 25.0} {"model": "mistral-large-2", "score": 29.0} {"model": "gpt-4o-mini-2024-07-18", "score": 20.1} {"model": "deepseek-v2.5-0908", "score": 22.1} {"model": "claude-3-opus-20240229", "score": 27.0} {"model": "meta-llama-3.1-70b-instruct", "score": 24.9} {"model": "claude-3-5-haiku-20241022", "score": 18.7} {"model": "gemini-1.5-pro", "score": 19.4} {"model": "gpt-4-0314", "score": 27.1}