rzanoli commited on
Commit
c8225f5
·
1 Parent(s): 67324c2

Add charts

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py CHANGED
@@ -11,6 +11,71 @@ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REP
11
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
12
  from src.submission.submit import add_new_eval
13
  import random
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Define task metadata (icons, names, descriptions)
16
  TASK_METADATA_MULTIPLECHOICE = {
@@ -79,6 +144,9 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
79
 
80
  sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
81
 
 
 
 
82
  #print(sorted_dataframe['Combined Performance'])
83
 
84
  field_list = fields(AutoEvalColumn)
@@ -178,6 +246,11 @@ with demo:
178
  hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
179
  )
180
 
 
 
 
 
 
181
  # About tab
182
  with gr.TabItem("📝 About"):
183
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
11
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
12
  from src.submission.submit import add_new_eval
13
  import random
14
+ import matplotlib.pyplot as plt
15
+ import re
16
+ import plotly.express as px
17
+ import plotly.graph_objects as go
18
+
19
+
20
+ def line_chart(dataframe):
21
+ # Separiamo i dati in base a IS_FS
22
+ df_true = dataframe[dataframe['IS_FS'] == True]
23
+ df_false = dataframe[dataframe['IS_FS'] == False]
24
+
25
+ # Estrai valori x, y e labels per True e False
26
+ x_true = df_true['#Params (B)'].tolist()
27
+ y_true = df_true['Avg. Combined Performance ⬆️'].tolist()
28
+ labels_true = [
29
+ re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
30
+ for m in df_true['Model'].tolist()
31
+ ]
32
+
33
+ x_false = df_false['#Params (B)'].tolist()
34
+ y_false = df_false['Avg. Combined Performance ⬆️'].tolist()
35
+ labels_false = [
36
+ re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
37
+ for m in df_false['Model'].tolist()
38
+ ]
39
+
40
+ fig = go.Figure()
41
+
42
+ # Punti IS_FS=True
43
+ fig.add_trace(go.Scatter(
44
+ x=x_true,
45
+ y=y_true,
46
+ mode='markers', # solo marker, niente testo
47
+ name='5-Few-Shot',
48
+ marker=dict(color='red', size=10),
49
+ hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
50
+ customdata=labels_true # tutte le informazioni sul hover
51
+ ))
52
+
53
+ # Punti IS_FS=False
54
+ fig.add_trace(go.Scatter(
55
+ x=x_false,
56
+ y=y_false,
57
+ mode='markers',
58
+ name='0-Shot',
59
+ marker=dict(color='blue', size=10),
60
+ hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
61
+ customdata=labels_false
62
+ ))
63
+
64
+ fig.update_layout(
65
+ title="Avg. Combined Performance vs #Params",
66
+ xaxis_title="#Params (B)",
67
+ yaxis_title="Avg. Combined Performance ⬆️",
68
+ template="plotly_white",
69
+ hovermode="closest",
70
+ dragmode=False
71
+ )
72
+
73
+ # Disabilita lo zoom e altri controlli
74
+ fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
75
+ fig.update_yaxes(fixedrange=True)
76
+
77
+ return fig
78
+
79
 
80
  # Define task metadata (icons, names, descriptions)
81
  TASK_METADATA_MULTIPLECHOICE = {
 
144
 
145
  sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
146
 
147
+ pd.set_option('display.max_colwidth', None)
148
+ #print("========================", dataframe['Model'])
149
+
150
  #print(sorted_dataframe['Combined Performance'])
151
 
152
  field_list = fields(AutoEvalColumn)
 
246
  hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
247
  )
248
 
249
+ with gr.TabItem("Charts"):
250
+ #gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
251
+ #gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
252
+ gr.Plot(value=line_chart(LEADERBOARD_DF))
253
+
254
  # About tab
255
  with gr.TabItem("📝 About"):
256
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")