Spaces:
Running
Running
add new models
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +3 -1
- leaderboard_data.jsonl +2 -0
- urial_bench/Llama-2-13b-hf.turn1.json +0 -0
- urial_bench/Llama-2-13b-hf.turn2.json +0 -0
- urial_bench/Llama-2-70b-hf.turn1.json +0 -0
- urial_bench/Llama-2-70b-hf.turn2.json +0 -0
- urial_bench/Llama-2-7b-hf.turn1.json +0 -0
- urial_bench/Llama-2-7b-hf.turn2.json +0 -0
- urial_bench/Mistral-7b-v0.1.turn1.json +0 -0
- urial_bench/Mistral-7b-v0.1.turn2.json +0 -0
- urial_bench/Mistral-7b-v0.2-URIAL-0210v1.jsonl +0 -0
- urial_bench/Mistral-7b-v0.2.turn1.json +0 -0
- urial_bench/Mistral-7b-v0.2.turn2.json +0 -0
- urial_bench/Mixtral-8x7B-v0.1.turn1.json +0 -0
- urial_bench/Mixtral-8x7B-v0.1.turn2.json +0 -0
- urial_bench/Yi-34B.turn1.json +0 -0
- urial_bench/Yi-34B.turn2.json +0 -0
- urial_bench/Yi-6B.turn1.json +0 -0
- urial_bench/Yi-6B.turn2.json +0 -0
- urial_bench/amber.turn1.json +0 -0
- urial_bench/amber.turn2.json +0 -0
- urial_bench/dbrx-URIAL-0210v1.jsonl +0 -0
- urial_bench/dbrx.turn1.json +0 -0
- urial_bench/dbrx.turn2.json +0 -0
- urial_bench/falcon-7b.turn1.json +0 -0
- urial_bench/falcon-7b.turn2.json +0 -0
- urial_bench/gemma-2b.turn1.json +0 -0
- urial_bench/gemma-2b.turn2.json +0 -0
- urial_bench/gemma-7b.turn1.json +0 -0
- urial_bench/gemma-7b.turn2.json +0 -0
- urial_bench/mpt-7b.turn1.json +0 -0
- urial_bench/mpt-7b.turn2.json +0 -0
- urial_bench/olmo-7b-vllm.turn1.json +0 -0
- urial_bench/olmo-7b-vllm.turn2.json +0 -0
- urial_bench/olmo.turn1.0-20.json +0 -0
- urial_bench/olmo.turn1.20-40.json +0 -0
- urial_bench/olmo.turn1.40-60.json +0 -0
- urial_bench/olmo.turn1.60-80.json +0 -0
- urial_bench/olmo.turn1.json +0 -0
- urial_bench/olmo.turn2.0-20.json +0 -0
- urial_bench/olmo.turn2.20-40.json +0 -0
- urial_bench/olmo.turn2.40-60.json +0 -0
- urial_bench/olmo.turn2.60-80.json +0 -0
- urial_bench/olmo.turn2.json +0 -0
- urial_bench/phi-2-vllm.turn1.json +0 -0
- urial_bench/phi-2-vllm.turn2.json +0 -0
- urial_bench/phi-2.turn1.0-20.json +0 -0
- urial_bench/phi-2.turn1.20-40.json +0 -0
- urial_bench/phi-2.turn1.40-60.json +0 -0
- urial_bench/phi-2.turn1.60-80.json +0 -0
app.py
CHANGED
@@ -41,7 +41,8 @@ model_info = {
|
|
41 |
"Llama-2-13b-hf": {"hf_name": "meta-llama/Llama-2-13b-hf", "pretty_name": "Llama-2-13B"},
|
42 |
"Llama-2-7b-hf": {"hf_name": "meta-llama/Llama-2-7b-hf", "pretty_name": "Llama-2-7B"},
|
43 |
"Mixtral-8x7B-v0.1": {"hf_name": "mistralai/Mixtral-8x7B-v0.1", "pretty_name": "Mixtral-8x7B"},
|
44 |
-
"Mistral-7b-v0.1": {"hf_name": "mistralai/Mistral-7B-v0.1", "pretty_name": "Mistral-7B"},
|
|
|
45 |
"Yi-34B": {"hf_name": "01-ai/Yi-34B", "pretty_name": "Yi-34B"},
|
46 |
"Yi-6B": {"hf_name": "01-ai/Yi-6B", "pretty_name": "Yi-6B"},
|
47 |
"gemma-7b": {"hf_name": "google/gemma-7b", "pretty_name": "Gemma-7B"},
|
@@ -53,6 +54,7 @@ model_info = {
|
|
53 |
"falcon-7b": {"hf_name": "microsoft/falcon-7b", "pretty_name": "Falcon-7B"},
|
54 |
"mpt-7b": {"hf_name": "mosaicml/mpt-7b", "pretty_name": "MPT-7B"},
|
55 |
"amber": {"hf_name": "LLM360/Amber", "pretty_name": "Amber (7B)"},
|
|
|
56 |
}
|
57 |
|
58 |
|
|
|
41 |
"Llama-2-13b-hf": {"hf_name": "meta-llama/Llama-2-13b-hf", "pretty_name": "Llama-2-13B"},
|
42 |
"Llama-2-7b-hf": {"hf_name": "meta-llama/Llama-2-7b-hf", "pretty_name": "Llama-2-7B"},
|
43 |
"Mixtral-8x7B-v0.1": {"hf_name": "mistralai/Mixtral-8x7B-v0.1", "pretty_name": "Mixtral-8x7B"},
|
44 |
+
"Mistral-7b-v0.1": {"hf_name": "mistralai/Mistral-7B-v0.1", "pretty_name": "Mistral-7B v0.1"},
|
45 |
+
"Mistral-7b-v0.2": {"hf_name": "mistralai/Mistral-7B-v0.1", "pretty_name": "Mistral-7B v0.2"},
|
46 |
"Yi-34B": {"hf_name": "01-ai/Yi-34B", "pretty_name": "Yi-34B"},
|
47 |
"Yi-6B": {"hf_name": "01-ai/Yi-6B", "pretty_name": "Yi-6B"},
|
48 |
"gemma-7b": {"hf_name": "google/gemma-7b", "pretty_name": "Gemma-7B"},
|
|
|
54 |
"falcon-7b": {"hf_name": "microsoft/falcon-7b", "pretty_name": "Falcon-7B"},
|
55 |
"mpt-7b": {"hf_name": "mosaicml/mpt-7b", "pretty_name": "MPT-7B"},
|
56 |
"amber": {"hf_name": "LLM360/Amber", "pretty_name": "Amber (7B)"},
|
57 |
+
"dbrx": {"hf_name": "databricks/dbrx-base", "pretty_name": "DBRX-base"},
|
58 |
}
|
59 |
|
60 |
|
leaderboard_data.jsonl
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
{"model": "gpt-4", "Turn 1": 8.95625, "Turn 2": 9.025, "Overall": 8.990625, "coding": 8.55, "extraction": 9.375, "humanities": 9.95, "math": 6.8, "reasoning": 9.0, "roleplay": 8.9, "stem": 9.7, "writing": 9.65}
|
2 |
{"model": "gpt-3.5-turbo", "Turn 1": 8.075, "Turn 2": 7.8125, "Overall": 7.94375, "coding": 6.9, "extraction": 8.85, "humanities": 9.55, "math": 6.3, "reasoning": 5.65, "roleplay": 8.4, "stem": 8.7, "writing": 9.2}
|
|
|
3 |
{"model": "Llama-2-70b-hf", "Turn 1": 7.60625, "Turn 2": 6.6125, "Overall": 7.109375, "coding": 4.15, "extraction": 7.7, "humanities": 9.75, "math": 3.6, "reasoning": 6.1, "roleplay": 7.325, "stem": 8.75, "writing": 9.5}
|
4 |
{"model": "Mixtral-8x7B-v0.1", "Turn 1": 7.69375, "Turn 2": 6.1875, "Overall": 6.940625, "coding": 5.3, "extraction": 7.05, "humanities": 9.2, "math": 4.85, "reasoning": 5.3, "roleplay": 7.4, "stem": 8.225, "writing": 8.2}
|
5 |
{"model": "Mistral-7b-v0.1", "Turn 1": 7.4875, "Turn 2": 5.8625, "Overall": 6.675, "coding": 4.6, "extraction": 7.75, "humanities": 9.075, "math": 3.4, "reasoning": 4.9, "roleplay": 7.65, "stem": 8.275, "writing": 7.75}
|
6 |
{"model": "Yi-34B", "Turn 1": 7.19375, "Turn 2": 6.15625, "Overall": 6.675, "coding": 3.85, "extraction": 6.8, "humanities": 8.475, "math": 4.8, "reasoning": 6.0, "roleplay": 7.75, "stem": 7.825, "writing": 7.9}
|
|
|
7 |
{"model": "phi-2-vllm", "Turn 1": 7.16875, "Turn 2": 4.936708860759493, "Overall": 6.059748427672956, "coding": 4.55, "extraction": 5.3, "humanities": 8.65, "math": 3.35, "reasoning": 5.5, "roleplay": 6.625, "stem": 7.105263157894737, "writing": 7.45}
|
8 |
{"model": "gemma-7b", "Turn 1": 6.96875, "Turn 2": 5.0375, "Overall": 6.003125, "coding": 3.95, "extraction": 6.25, "humanities": 8.825, "math": 4.35, "reasoning": 4.5, "roleplay": 6.25, "stem": 7.25, "writing": 6.65}
|
9 |
{"model": "phi-2", "Turn 1": 7.0375, "Turn 2": 4.6625, "Overall": 5.85, "coding": 4.25, "extraction": 4.45, "humanities": 8.85, "math": 3.8, "reasoning": 4.55, "roleplay": 7.2, "stem": 7.0, "writing": 6.7}
|
|
|
1 |
{"model": "gpt-4", "Turn 1": 8.95625, "Turn 2": 9.025, "Overall": 8.990625, "coding": 8.55, "extraction": 9.375, "humanities": 9.95, "math": 6.8, "reasoning": 9.0, "roleplay": 8.9, "stem": 9.7, "writing": 9.65}
|
2 |
{"model": "gpt-3.5-turbo", "Turn 1": 8.075, "Turn 2": 7.8125, "Overall": 7.94375, "coding": 6.9, "extraction": 8.85, "humanities": 9.55, "math": 6.3, "reasoning": 5.65, "roleplay": 8.4, "stem": 8.7, "writing": 9.2}
|
3 |
+
{"model": "dbrx", "Turn 1": 8.0375, "Turn 2": 6.4, "Overall": 7.21875, "coding": 4.7, "extraction": 7.65, "humanities": 9.325, "math": 5.9, "reasoning": 5.9, "roleplay": 7.925, "stem": 8.425, "writing": 7.925}
|
4 |
{"model": "Llama-2-70b-hf", "Turn 1": 7.60625, "Turn 2": 6.6125, "Overall": 7.109375, "coding": 4.15, "extraction": 7.7, "humanities": 9.75, "math": 3.6, "reasoning": 6.1, "roleplay": 7.325, "stem": 8.75, "writing": 9.5}
|
5 |
{"model": "Mixtral-8x7B-v0.1", "Turn 1": 7.69375, "Turn 2": 6.1875, "Overall": 6.940625, "coding": 5.3, "extraction": 7.05, "humanities": 9.2, "math": 4.85, "reasoning": 5.3, "roleplay": 7.4, "stem": 8.225, "writing": 8.2}
|
6 |
{"model": "Mistral-7b-v0.1", "Turn 1": 7.4875, "Turn 2": 5.8625, "Overall": 6.675, "coding": 4.6, "extraction": 7.75, "humanities": 9.075, "math": 3.4, "reasoning": 4.9, "roleplay": 7.65, "stem": 8.275, "writing": 7.75}
|
7 |
{"model": "Yi-34B", "Turn 1": 7.19375, "Turn 2": 6.15625, "Overall": 6.675, "coding": 3.85, "extraction": 6.8, "humanities": 8.475, "math": 4.8, "reasoning": 6.0, "roleplay": 7.75, "stem": 7.825, "writing": 7.9}
|
8 |
+
{"model": "Mistral-7b-v0.2", "Turn 1": 6.9875, "Turn 2": 5.55, "Overall": 6.26875, "coding": 3.8, "extraction": 7.45, "humanities": 8.95, "math": 3.35, "reasoning": 4.5, "roleplay": 6.7, "stem": 7.425, "writing": 7.975}
|
9 |
{"model": "phi-2-vllm", "Turn 1": 7.16875, "Turn 2": 4.936708860759493, "Overall": 6.059748427672956, "coding": 4.55, "extraction": 5.3, "humanities": 8.65, "math": 3.35, "reasoning": 5.5, "roleplay": 6.625, "stem": 7.105263157894737, "writing": 7.45}
|
10 |
{"model": "gemma-7b", "Turn 1": 6.96875, "Turn 2": 5.0375, "Overall": 6.003125, "coding": 3.95, "extraction": 6.25, "humanities": 8.825, "math": 4.35, "reasoning": 4.5, "roleplay": 6.25, "stem": 7.25, "writing": 6.65}
|
11 |
{"model": "phi-2", "Turn 1": 7.0375, "Turn 2": 4.6625, "Overall": 5.85, "coding": 4.25, "extraction": 4.45, "humanities": 8.85, "math": 3.8, "reasoning": 4.55, "roleplay": 7.2, "stem": 7.0, "writing": 6.7}
|
urial_bench/Llama-2-13b-hf.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Llama-2-13b-hf.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Llama-2-70b-hf.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Llama-2-70b-hf.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Llama-2-7b-hf.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Llama-2-7b-hf.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Mistral-7b-v0.1.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Mistral-7b-v0.1.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Mistral-7b-v0.2-URIAL-0210v1.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Mistral-7b-v0.2.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Mistral-7b-v0.2.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Mixtral-8x7B-v0.1.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Mixtral-8x7B-v0.1.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Yi-34B.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Yi-34B.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Yi-6B.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/Yi-6B.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/amber.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/amber.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/dbrx-URIAL-0210v1.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/dbrx.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/dbrx.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/falcon-7b.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/falcon-7b.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/gemma-2b.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/gemma-2b.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/gemma-7b.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/gemma-7b.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/mpt-7b.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/mpt-7b.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo-7b-vllm.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo-7b-vllm.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn1.0-20.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn1.20-40.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn1.40-60.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn1.60-80.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn2.0-20.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn2.20-40.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn2.40-60.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn2.60-80.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/olmo.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/phi-2-vllm.turn1.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/phi-2-vllm.turn2.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/phi-2.turn1.0-20.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/phi-2.turn1.20-40.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/phi-2.turn1.40-60.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
urial_bench/phi-2.turn1.60-80.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|