Spaces:
Running
Running
Use real results dataset
#1
by
sergiopaniego
HF Staff
- opened
- app.py +3 -3
- data_utils.py +13 -13
app.py
CHANGED
@@ -6,11 +6,11 @@ from data_utils import *
|
|
6 |
|
7 |
from datasets import load_dataset
|
8 |
|
9 |
-
# Dummy
|
10 |
ds = load_dataset("visionLMsftw/vibe-testing-samples", split="train")
|
11 |
-
models = get_model_names()
|
12 |
evaluation_data = get_evaluation_data(ds)
|
13 |
-
|
|
|
|
|
14 |
|
15 |
|
16 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
|
6 |
|
7 |
from datasets import load_dataset
|
8 |
|
|
|
9 |
ds = load_dataset("visionLMsftw/vibe-testing-samples", split="train")
|
|
|
10 |
evaluation_data = get_evaluation_data(ds)
|
11 |
+
ds_results = load_dataset("visionLMsftw/vibe-testing-results", split="train")
|
12 |
+
models = get_model_names(ds_results)
|
13 |
+
responses = get_responses(ds_results)
|
14 |
|
15 |
|
16 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
data_utils.py
CHANGED
@@ -29,19 +29,19 @@ def get_evaluation_data(ds):
|
|
29 |
})
|
30 |
return evaluation_data
|
31 |
|
32 |
-
def get_model_names():
|
33 |
-
models = [
|
34 |
return models
|
35 |
|
36 |
-
def get_responses():
|
37 |
-
responses = {
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
}
|
46 |
-
|
47 |
return responses
|
|
|
29 |
})
|
30 |
return evaluation_data
|
31 |
|
32 |
+
def get_model_names(ds_results):
|
33 |
+
models = list(set(ds_results['model_id']))
|
34 |
return models
|
35 |
|
36 |
+
def get_responses(ds_results):
|
37 |
+
responses = {}
|
38 |
+
|
39 |
+
for model in set(ds_results['model_id']):
|
40 |
+
model_responses = [
|
41 |
+
row["model_response"]
|
42 |
+
for row in ds_results
|
43 |
+
if row["model_id"] == model
|
44 |
+
]
|
45 |
+
responses[model] = {i: resp for i, resp in enumerate(model_responses)}
|
46 |
+
|
47 |
return responses
|