{"model": "o1-2024-12-17", "score": 60.0} {"model": "o3-mini-2025-01-31", "score": 42.8} {"model": "deepseek-r1", "score": 28.7} {"model": "o1-mini-2024-09-12", "score": 18.8} {"model": "gemini-2.0-flash-thinking-exp-01-21", "score": 15.2} {"model": "qwen2.5-max", "score": 13.8} {"model": "llama-3.1-405b-instruct", "score": 13.2}