{"model": "o1-2024-12-17 (high)", "score": 73.1} {"model": "o3-mini-2025-01-31 (high)", "score": 71.6} {"model": "o3-mini-2025-01-31 (medium)", "score": 68.8} {"model": "o1-2024-12-17 (medium)", "score": 65.4} {"model": "deepseek-r1-preview", "score": 64.3} {"model": "o1-2024-12-17 (low)", "score": 62.7} {"model": "o3-mini-2025-01-31 (low)", "score": 62.7} {"model": "o1-mini-2024-09-12", "score": 54.1} {"model": "deepseek-r1-lite-preview", "score": 50.4} {"model": "gemini-flash-2.0-thinking-01-21", "score": 45} {"model": "qwq-32b-preview", "score": 44} {"model": "gemini-flash-2.0-thinking-12-19", "score": 43.4} {"model": "o1-preview-2024-09-12", "score": 42.5} {"model": "claude-3.5-sonnet-20241022", "score": 37.1} {"model": "deepseek-v3", "score": 36.3} {"model": "gpt-4o-2024-05-13", "score": 33} {"model": "claude-3.5-sonnet-20240620", "score": 32} {"model": "gemini-flash-2.0-exp", "score": 32} {"model": "gemini-pro-1.5-002", "score": 30.9} {"model": "gpt-4o-2024-08-06", "score": 30.5} {"model": "gpt-4-turbo-2024-04-09", "score": 29.6} {"model": "gemini-flash-1.5-002", "score": 28.4} {"model": "gpt-4o-mini-2024-07-18", "score": 27.7} {"model": "mistral-large", "score": 27.6} {"model": "codestral-latest", "score": 23.8} {"model": "claude-3-haiku", "score": 17.1}