Commit
·
23d64a1
1
Parent(s):
6d9eef2
let's add the tsv file
Browse files
FACTS.tsv
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model size Separate Grounding Score Separate Quality Score Combined Score
|
2 |
+
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B 14 0.817797 0.542373 0.457627
|
3 |
+
VIDraft/Gemma-3-R1984-27B 27 0.93617 0.459574 0.434043
|
4 |
+
meta-llama/Llama-3.3-70B-Instruct 70 0.842553 0.510638 0.425532
|
5 |
+
Qwen/Qwen3-30B-A3B 30 0.812766 0.540426 0.425532
|
6 |
+
Qwen/Qwen3-4B 4 0.770213 0.540426 0.425532
|
7 |
+
google/medgemma-27b-text-it 27 0.940678 0.440678 0.419492
|
8 |
+
Qwen/Qwen3-32B 32 0.740426 0.553191 0.417021
|
9 |
+
deepseek-ai/DeepSeek-R1-0528-Qwen3-8B 8 0.834746 0.478814 0.411017
|
10 |
+
deepseek-ai/DeepSeek-R1-Distill-Llama-8B 8 0.766949 0.516949 0.40678
|
11 |
+
Qwen/Qwen3-8B 8 0.748936 0.523404 0.4
|
12 |
+
Qwen/Qwen3-14B 14 0.778723 0.502128 0.382979
|
13 |
+
google/gemma-3-27b-it 27 0.936 0.391 0.378
|
14 |
+
google/medgemma-4b-it 4 0.834746 0.423729 0.377119
|
15 |
+
Qwen/Qwen2.5-VL-32B-Instruct 32 0.621277 0.570213 0.357447
|
16 |
+
meta-llama/Llama-3.1-70B-Instruct 70 0.855932 0.389831 0.334746
|
17 |
+
google/gemma-3-12b-it 12 0.944 0.343 0.313
|
18 |
+
google/gemma-3-4b-it 4 0.9 0.33 0.3
|
19 |
+
Qwen/Qwen3-1.7B 1.7 0.702128 0.451064 0.297872
|
20 |
+
deepseek-ai/DeepSeek-R1-Distill-Qwen-7B 7 0.59322 0.449153 0.275424
|
21 |
+
Qwen/Qwen3-0.6B 0.6 0.682203 0.330508 0.266949
|
22 |
+
Qwen/Qwen2.5-7B-Instruct 7 0.731915 0.310638 0.255319
|
23 |
+
Qwen/Qwen2.5-14B-Instruct-1M 14 0.70339 0.300847 0.254237
|
24 |
+
nvidia/Llama-Nemotron-Nano-8B 8 0.576271 0.402542 0.241525
|
25 |
+
OpenScholar/Llama-3.1-OpenScholar-8B 8 0.690678 0.283898 0.241525
|
26 |
+
Qwen/Qwen2.5-7B-Instruct-1M 7 0.737288 0.271186 0.207627
|
27 |
+
nvidia/Llama-Nemotron-Nano-4B-v1.1 4 0.548936 0.340426 0.2
|
28 |
+
google/gemma-3-1b-it 1 0.65 0.28 0.19
|
29 |
+
mistralai/Ministral-8B-Instruct-2410 8 0.94 0.184 0.175
|
30 |
+
meta-llama/Llama-3.1-8B-Instruct 8 0.665254 0.194915 0.169492
|
31 |
+
mistralai/Mistral-Small-3.1-24B-Instruct-2503 24 0.953191 0.165957 0.157447
|
32 |
+
mistralai/Mistral-Small-24B-Instruct-2501 24 0.95339 0.135593 0.131356
|
33 |
+
open-thoughts/OpenThinker-7B 7 0.478814 0.152542 0.110169
|
34 |
+
PleIAs/Pleias-RAG-350M 0.35 0.236264 0.021978 0.010989
|
35 |
+
PleIAs/Pleias-RAG-1B 1 0.190476 0.037037 0
|
app.py
CHANGED
@@ -1,23 +1,16 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
-
import requests
|
5 |
from io import StringIO
|
6 |
import os
|
7 |
|
8 |
-
dropbox_url = os.getenv("FACTS_RES_URL")
|
9 |
-
|
10 |
try:
|
11 |
-
#
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
# Read the TSV data
|
16 |
-
df = pd.read_csv(StringIO(response.text), sep='\t')
|
17 |
-
print(f"Successfully loaded {len(df)} models from Dropbox")
|
18 |
except Exception as e:
|
19 |
-
print(f"Error loading data from
|
20 |
-
# Show sample data when
|
21 |
df = pd.DataFrame({
|
22 |
'model': [
|
23 |
'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B',
|
@@ -30,7 +23,7 @@ except Exception as e:
|
|
30 |
'Separate Quality Score': [0.542373, 0.510638, 0.540426, 0.391],
|
31 |
'Combined Score': [0.457627, 0.425532, 0.425532, 0.378]
|
32 |
})
|
33 |
-
print("Showing sample data (
|
34 |
|
35 |
# Clean up the data
|
36 |
df = df.dropna() # Remove any rows with missing values
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
4 |
from io import StringIO
|
5 |
import os
|
6 |
|
|
|
|
|
7 |
try:
|
8 |
+
# Read the local TSV file
|
9 |
+
df = pd.read_csv("FACTS.tsv", sep='\t')
|
10 |
+
print(f"Successfully loaded {len(df)} models from local file")
|
|
|
|
|
|
|
|
|
11 |
except Exception as e:
|
12 |
+
print(f"Error loading data from local file: {e}")
|
13 |
+
# Show sample data when file reading fails
|
14 |
df = pd.DataFrame({
|
15 |
'model': [
|
16 |
'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B',
|
|
|
23 |
'Separate Quality Score': [0.542373, 0.510638, 0.540426, 0.391],
|
24 |
'Combined Score': [0.457627, 0.425532, 0.425532, 0.378]
|
25 |
})
|
26 |
+
print("Showing sample data (file read failed)")
|
27 |
|
28 |
# Clean up the data
|
29 |
df = df.dropna() # Remove any rows with missing values
|