{"model": "o1-2024-12-17", "score": 35.5} {"model": "gemini-exp-1206", "score": 34.1} {"model": "gpt-4-turbo-2024-04-09", "score": 32.1} {"model": "athene-v2-chat", "score": 32.1} {"model": "athene-v2-agent", "score": 31.4} {"model": "gpt-4o-2024-11-20", "score": 31.1} {"model": "gpt-4o-2024-08-06", "score": 30.8} {"model": "qwen2.5-coder-32b-instruct", "score": 30.8} {"model": "claude-3.5-sonnet-20241022", "score": 30.4} {"model": "claude-3.5-haiku-20241022", "score": 30.1} {"model": "claude-3.5-sonnet-20240620", "score": 29.4} {"model": "deepseek-coder-v2-instruct (2024-07-24)", "score": 29.4} {"model": "gemini-1.5-pro-exp-0827", "score": 29.4} {"model": "gemini-exp-1114", "score": 29.4} {"model": "o1-preview-2024-09-12 (temperature=1)", "score": 28.8} {"model": "deepseek-v2-chat (2024-06-28)", "score": 28.7} {"model": "llama-3.3-70b-instruct", "score": 28.4} {"model": "gemini-2.0-flash-exp", "score": 28.1} {"model": "gemini-1.5-pro-exp-0801", "score": 27.4} {"model": "o1-mini-2024-09-12 (temperature=1)", "score": 27.4} {"model": "gemini-exp-1121", "score": 27.4} {"model": "gemini-2.0-flash-thinking-exp-1219", "score": 27.4} {"model": "gpt-4o-2024-05-13", "score": 27.1} {"model": "deepseek-coder-v2-instruct", "score": 27} {"model": "gemini-1.5-pro-002", "score": 26.6} {"model": "grok-beta", "score": 26.6} {"model": "llama-3.1-405b-instruct", "score": 26.4} {"model": "deepseek-v2.5-1210", "score": 26.4} {"model": "deepseek-v2.5", "score": 26.1} {"model": "claude-3-opus-20240229", "score": 26} {"model": "mistral-large-instruct-2407", "score": 26} {"model": "gemini-1.5-pro-api-0514", "score": 25.4} {"model": "llama-3.1-70b-instruct", "score": 25.4} {"model": "qwen2.5-72b-instruct", "score": 25.4} {"model": "gpt-4o-mini-2024-07-18", "score": 25.3} {"model": "llama-3-70b-instruct", "score": 24.6} {"model": "qwen2.5-32b-instruct", "score": 24.6} {"model": "llama-3.1-nemotron-70b-instruct", "score": 24.6} {"model": "dracarys-llama-3.1-70b-instruct", "score": 24.3} {"model": "gemini-1.5-flash-api-0514", "score": 23.6} {"model": "llama-3-70b-synthia-v3.5", "score": 23.6} {"model": "claude-3-sonnet-20240229", "score": 23.4} {"model": "dracarys-72b-instruct", "score": 22.6} {"model": "hermes-2-theta-llama-3-70b", "score": 22.3} {"model": "phi-3.1-mini-128k-instruct", "score": 22} {"model": "hermes-2-pro-llama-3-70b", "score": 21.6} {"model": "gemini-1.5-flash-exp-0827", "score": 21.6} {"model": "qwen2.5-14b-instruct", "score": 20.9} {"model": "qwen2-72b-chat", "score": 20.6} {"model": "codestral-22b-v0.1", "score": 20.6} {"model": "qwen2.5-coder-7b-instruct", "score": 20.3} {"model": "gemma-2-27b-instruct", "score": 20} {"model": "gpt-3.5-turbo-0125", "score": 19.9} {"model": "mixtral-8x22b-instruct", "score": 19.9} {"model": "athene-70b", "score": 19.9} {"model": "deepseek-coder-33b-instruct", "score": 19.3} {"model": "whiterabbitneo-33b-v1.5", "score": 19.3} {"model": "reflectioncoder-ds-33b", "score": 18.9} {"model": "deepseek-v2-chat", "score": 18.6} {"model": "opencoder-8b-instruct", "score": 18.5} {"model": "claude-3-haiku-20240307", "score": 18.3} {"model": "gpt-4-0613", "score": 17.6}