Add charts
Browse files
app.py
CHANGED
@@ -11,6 +11,71 @@ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REP
|
|
11 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
12 |
from src.submission.submit import add_new_eval
|
13 |
import random
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Define task metadata (icons, names, descriptions)
|
16 |
TASK_METADATA_MULTIPLECHOICE = {
|
@@ -79,6 +144,9 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
|
|
79 |
|
80 |
sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
|
81 |
|
|
|
|
|
|
|
82 |
#print(sorted_dataframe['Combined Performance'])
|
83 |
|
84 |
field_list = fields(AutoEvalColumn)
|
@@ -178,6 +246,11 @@ with demo:
|
|
178 |
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
|
179 |
)
|
180 |
|
|
|
|
|
|
|
|
|
|
|
181 |
# About tab
|
182 |
with gr.TabItem("📝 About"):
|
183 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
11 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
12 |
from src.submission.submit import add_new_eval
|
13 |
import random
|
14 |
+
import matplotlib.pyplot as plt
|
15 |
+
import re
|
16 |
+
import plotly.express as px
|
17 |
+
import plotly.graph_objects as go
|
18 |
+
|
19 |
+
|
20 |
+
def line_chart(dataframe):
|
21 |
+
# Separiamo i dati in base a IS_FS
|
22 |
+
df_true = dataframe[dataframe['IS_FS'] == True]
|
23 |
+
df_false = dataframe[dataframe['IS_FS'] == False]
|
24 |
+
|
25 |
+
# Estrai valori x, y e labels per True e False
|
26 |
+
x_true = df_true['#Params (B)'].tolist()
|
27 |
+
y_true = df_true['Avg. Combined Performance ⬆️'].tolist()
|
28 |
+
labels_true = [
|
29 |
+
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
30 |
+
for m in df_true['Model'].tolist()
|
31 |
+
]
|
32 |
+
|
33 |
+
x_false = df_false['#Params (B)'].tolist()
|
34 |
+
y_false = df_false['Avg. Combined Performance ⬆️'].tolist()
|
35 |
+
labels_false = [
|
36 |
+
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
37 |
+
for m in df_false['Model'].tolist()
|
38 |
+
]
|
39 |
+
|
40 |
+
fig = go.Figure()
|
41 |
+
|
42 |
+
# Punti IS_FS=True
|
43 |
+
fig.add_trace(go.Scatter(
|
44 |
+
x=x_true,
|
45 |
+
y=y_true,
|
46 |
+
mode='markers', # solo marker, niente testo
|
47 |
+
name='5-Few-Shot',
|
48 |
+
marker=dict(color='red', size=10),
|
49 |
+
hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
|
50 |
+
customdata=labels_true # tutte le informazioni sul hover
|
51 |
+
))
|
52 |
+
|
53 |
+
# Punti IS_FS=False
|
54 |
+
fig.add_trace(go.Scatter(
|
55 |
+
x=x_false,
|
56 |
+
y=y_false,
|
57 |
+
mode='markers',
|
58 |
+
name='0-Shot',
|
59 |
+
marker=dict(color='blue', size=10),
|
60 |
+
hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
|
61 |
+
customdata=labels_false
|
62 |
+
))
|
63 |
+
|
64 |
+
fig.update_layout(
|
65 |
+
title="Avg. Combined Performance vs #Params",
|
66 |
+
xaxis_title="#Params (B)",
|
67 |
+
yaxis_title="Avg. Combined Performance ⬆️",
|
68 |
+
template="plotly_white",
|
69 |
+
hovermode="closest",
|
70 |
+
dragmode=False
|
71 |
+
)
|
72 |
+
|
73 |
+
# Disabilita lo zoom e altri controlli
|
74 |
+
fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
|
75 |
+
fig.update_yaxes(fixedrange=True)
|
76 |
+
|
77 |
+
return fig
|
78 |
+
|
79 |
|
80 |
# Define task metadata (icons, names, descriptions)
|
81 |
TASK_METADATA_MULTIPLECHOICE = {
|
|
|
144 |
|
145 |
sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
|
146 |
|
147 |
+
pd.set_option('display.max_colwidth', None)
|
148 |
+
#print("========================", dataframe['Model'])
|
149 |
+
|
150 |
#print(sorted_dataframe['Combined Performance'])
|
151 |
|
152 |
field_list = fields(AutoEvalColumn)
|
|
|
246 |
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
|
247 |
)
|
248 |
|
249 |
+
with gr.TabItem("Charts"):
|
250 |
+
#gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
|
251 |
+
#gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
|
252 |
+
gr.Plot(value=line_chart(LEADERBOARD_DF))
|
253 |
+
|
254 |
# About tab
|
255 |
with gr.TabItem("📝 About"):
|
256 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|