Add Humanity's Last Exam, LiveBench and LiveCodeBench; Remove Codeforces; Update Simple Bench
b605a32
{"model": "gpt-4o", "score": 3.1} | |
{"model": "grok-2", "score": 3.9} | |
{"model": "claude-3-5-sonnet", "score": 4.8} | |
{"model": "gemini-2.0-flash-thinking", "score": 7.2} | |
{"model": "o1", "score": 7.2} |