rzanoli commited on
Commit
07ad260
·
1 Parent(s): 496735b

Add the results of GPT-4o for NER, REL and LS

Browse files
Files changed (1) hide show
  1. app.py +22 -4
app.py CHANGED
@@ -118,7 +118,7 @@ def barplot_mean_few_minus_zero_shot(dataframe, tasks=None):
118
  return fig
119
 
120
 
121
- def boxplot_per_task(dataframe=None, baselines=None):
122
 
123
  #print(dataframe.columns)
124
 
@@ -176,6 +176,16 @@ def boxplot_per_task(dataframe=None, baselines=None):
176
  )
177
  '''
178
 
 
 
 
 
 
 
 
 
 
 
179
  fig.update_layout(
180
  title="Distribution of Model Accuracy by Task",
181
  xaxis_title="Task",
@@ -190,7 +200,8 @@ def boxplot_per_task(dataframe=None, baselines=None):
190
  fig.add_annotation(
191
  text=(
192
  "In tasks like TE and SA, models approach the accuracy of supervised <br>"
193
- "models at EVALITA (dashed line); in NER and REL they remain lower."
 
194
  ),
195
  xref="paper", yref="paper",
196
  x=0.5, y=-0.30,
@@ -203,12 +214,19 @@ def boxplot_per_task(dataframe=None, baselines=None):
203
 
204
  return fig
205
 
206
-
207
  BASELINES = {
208
  "TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
209
  "LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
210
  }
211
 
 
 
 
 
 
 
 
212
 
213
  def boxplot_prompts_per_task(dataframe, tasks=None):
214
  if tasks is None:
@@ -690,7 +708,7 @@ with demo:
690
  # ⬇️ QUI aggiungiamo i grafici subito sotto la barra del titolo e sopra le tabs
691
  with gr.Row():
692
  gr.Plot(value=line_chart(LEADERBOARD_DF), elem_id="line-chart")
693
- gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES), elem_id="boxplot-task")
694
  #gr.Plot(value=boxplot_prompts_per_task(LEADERBOARD_DF), elem_id="boxplot-prompt-task")
695
 
696
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
 
118
  return fig
119
 
120
 
121
+ def boxplot_per_task(dataframe=None, baselines=None, references=None):
122
 
123
  #print(dataframe.columns)
124
 
 
176
  )
177
  '''
178
 
179
+ # reference GPT-4o
180
+ if task in references and references[task] is not None:
181
+ fig.add_shape(
182
+ type="line",
183
+ x0=i - 0.3, x1=i + 0.3,
184
+ y0=references[task], y1=references[task],
185
+ line=dict(color="red", width=2, dash="dashdot"),
186
+ xref="x", yref="y"
187
+ )
188
+
189
  fig.update_layout(
190
  title="Distribution of Model Accuracy by Task",
191
  xaxis_title="Task",
 
200
  fig.add_annotation(
201
  text=(
202
  "In tasks like TE and SA, models approach the accuracy of supervised <br>"
203
+ "models at EVALITA (dashed black line); in NER and REL they remain lower. <br>"
204
+ "Dashed red lines indicate GPT-4o results on generative tasks."
205
  ),
206
  xref="paper", yref="paper",
207
  x=0.5, y=-0.30,
 
214
 
215
  return fig
216
 
217
+ # EVALITA results
218
  BASELINES = {
219
  "TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
220
  "LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
221
  }
222
 
223
+ # GPT-4o
224
+ REFERENCES = {
225
+ "NER": 79.11,
226
+ "REL": 63.32,
227
+ "LS": 59.25
228
+ }
229
+
230
 
231
  def boxplot_prompts_per_task(dataframe, tasks=None):
232
  if tasks is None:
 
708
  # ⬇️ QUI aggiungiamo i grafici subito sotto la barra del titolo e sopra le tabs
709
  with gr.Row():
710
  gr.Plot(value=line_chart(LEADERBOARD_DF), elem_id="line-chart")
711
+ gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES, REFERENCES), elem_id="boxplot-task")
712
  #gr.Plot(value=boxplot_prompts_per_task(LEADERBOARD_DF), elem_id="boxplot-prompt-task")
713
 
714
  with gr.Tabs(elem_classes="tab-buttons") as tabs: