ggcristian commited on
Commit
a25be15
·
1 Parent(s): 40758cf

Remove logic for 'All' and 'Aggregated' for Graph view.

Browse files

Aggregated scores are computed by each task. In the graph view we do not give the ability to select tasks but to select benchmarks.

Files changed (3) hide show
  1. .gitattributes +1 -0
  2. app.py +19 -69
  3. logo.png +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -76,64 +76,18 @@ def update_benchmarks_by_task(task):
76
  return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
77
 
78
  def generate_scatter_plot(benchmark, metric):
79
- if benchmark == "All":
80
- models_data = []
81
-
82
- for bench in benchmarks:
83
- subset = df[df['Benchmark'] == bench]
84
- if bench == "RTL-Repo":
85
- subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
86
- models_in_bench = subset['Model'].unique()
87
- models_data.extend([(model, bench) for model in models_in_bench])
88
-
89
- all_models = list(set([m[0] for m in models_data]))
90
- details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
91
-
92
- if metric == "Aggregated ⬆️":
93
- agg_columns = [col for col in df_agg.columns if col.startswith('Agg ')]
94
- if agg_columns:
95
- agg_data = df_agg.copy()
96
- agg_data['Aggregated ⬆️'] = agg_data[agg_columns].mean(axis=1).round(2)
97
- scatter_data = pd.merge(details, agg_data[['Model', 'Aggregated ⬆️']], on='Model', how='inner')
98
- else:
99
- scatter_data = details.copy()
100
- scatter_data['Aggregated ⬆️'] = 50 # defaut
101
- else:
102
- scatter_data = details.copy()
103
- metric_data = df[df['Metric'] == metric].groupby('Model')['Score'].mean().reset_index()
104
- metric_data = metric_data.rename(columns={'Score': metric})
105
- scatter_data = pd.merge(scatter_data, metric_data, on='Model', how='left')
106
- scatter_data = scatter_data.dropna(subset=[metric] if metric in scatter_data.columns else ['Aggregated ⬆️'])
107
-
108
  else:
109
- # Code we already had for individual benchmark selection
110
- benchmark, metric = handle_special_cases(benchmark, metric)
111
 
112
- subset = df[df['Benchmark'] == benchmark]
113
- if benchmark == "RTL-Repo":
114
- subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
115
- detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
116
- detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
117
- detailed_scores['Aggregated ⬆️'] = detailed_scores['Exact Matching (EM)']
118
- else:
119
- agg_column = None
120
- detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
121
- if benchmark == 'VerilogEval S2R':
122
- agg_column = 'Agg VerilogEval S2R'
123
- elif benchmark == 'VerilogEval MC':
124
- agg_column = 'Agg VerilogEval MC'
125
- elif benchmark == 'RTLLM':
126
- agg_column = 'Agg RTLLM'
127
- elif benchmark == 'VeriGen':
128
- agg_column = 'Agg VeriGen'
129
- if agg_column and agg_column in df_agg.columns:
130
- agg_data = df_agg[['Model', agg_column]].rename(columns={agg_column: 'Aggregated ⬆️'})
131
- detailed_scores = pd.merge(detailed_scores, agg_data, on='Model', how='left')
132
- else:
133
- detailed_scores['Aggregated ⬆️'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1).round(2)
134
-
135
- details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
136
- scatter_data = pd.merge(detailed_scores, details, on='Model', how='left').dropna(subset=['Params', metric])
137
 
138
  scatter_data['x'] = scatter_data['Params']
139
  scatter_data['y'] = scatter_data[metric]
@@ -144,8 +98,7 @@ def generate_scatter_plot(benchmark, metric):
144
 
145
  y_axis_limits = {
146
  'Functionality (FNC)': [5, 90], 'Syntax (STX)': [20, 100], 'Synthesis (SYN)': [5, 90],
147
- 'Power': [0, 50], 'Performance': [0, 50], 'Area': [0, 50], 'Exact Matching (EM)': [0, 50],
148
- 'Aggregated ⬆️': [0, 80]
149
  }
150
  y_range = y_axis_limits.get(metric, [0, 80])
151
 
@@ -190,8 +143,8 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
190
  s2r_benchs = ["VerilogEval S2R", "RTLLM"]
191
  cc_benchs = ["VerilogEval MC", "VeriGen"]
192
  lc_benchs = ["RTL-Repo"]
 
193
  rtl_metrics = ["Exact Matching (EM)"]
194
- non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
195
  model_types = ['All', 'General', 'Coding', 'RTL-Specific']
196
 
197
  gr.HTML("""
@@ -263,10 +216,12 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
263
 
264
  with gr.Tab("Interactive Bubble Plot"):
265
  with gr.Row(equal_height=True):
266
- bubble_benchmark = gr.Dropdown(choices=["All"] + benchmarks, label="Select Benchmark", value='All', elem_classes="gr-dropdown")
267
- bubble_metric = gr.Dropdown(choices=["Aggregated ⬆️"] + non_rtl_metrics[:-1], label="Select Metric", value="Aggregated ⬆️")
 
 
268
  with gr.Row(equal_height=True):
269
- scatter_plot = gr.Plot(value=generate_scatter_plot('All', "Aggregated ⬆️"), label="Bubble Chart", elem_id="full-width-plot")
270
 
271
  with gr.Tab("About Us"):
272
  gr.HTML(
@@ -315,18 +270,13 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
315
  search_box.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
316
  params_slider.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
317
 
318
- # RTL-Repo Bubble plot
319
  def on_benchmark_change(benchmark, _):
320
  if benchmark == "RTL-Repo":
321
  metric = "Exact Matching (EM)"
322
  return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
323
  else:
324
- if benchmark == "All":
325
- metric = "Aggregated ⬆️" # default to Aggregated
326
- return gr.update(choices=["Aggregated ⬆️"] + non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
327
- else:
328
- metric = non_rtl_metrics[0]
329
- return gr.update(choices=non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
330
 
331
  def on_metric_change(benchmark, metric):
332
  benchmark, metric = handle_special_cases(benchmark, metric)
 
76
  return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
77
 
78
  def generate_scatter_plot(benchmark, metric):
79
+ benchmark, metric = handle_special_cases(benchmark, metric)
80
+
81
+ subset = df[df['Benchmark'] == benchmark]
82
+ if benchmark == "RTL-Repo":
83
+ subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
84
+ detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
85
+ detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  else:
87
+ detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
 
88
 
89
+ details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
90
+ scatter_data = pd.merge(detailed_scores, details, on='Model', how='left').dropna(subset=['Params', metric])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  scatter_data['x'] = scatter_data['Params']
93
  scatter_data['y'] = scatter_data[metric]
 
98
 
99
  y_axis_limits = {
100
  'Functionality (FNC)': [5, 90], 'Syntax (STX)': [20, 100], 'Synthesis (SYN)': [5, 90],
101
+ 'Power': [0, 50], 'Performance': [0, 50], 'Area': [0, 50], 'Exact Matching (EM)': [0, 50]
 
102
  }
103
  y_range = y_axis_limits.get(metric, [0, 80])
104
 
 
143
  s2r_benchs = ["VerilogEval S2R", "RTLLM"]
144
  cc_benchs = ["VerilogEval MC", "VeriGen"]
145
  lc_benchs = ["RTL-Repo"]
146
+ non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
147
  rtl_metrics = ["Exact Matching (EM)"]
 
148
  model_types = ['All', 'General', 'Coding', 'RTL-Specific']
149
 
150
  gr.HTML("""
 
216
 
217
  with gr.Tab("Interactive Bubble Plot"):
218
  with gr.Row(equal_height=True):
219
+ default_benchmark = s2r_benchs[0]
220
+ bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")
221
+ default_metric = non_rtl_metrics[0]
222
+ bubble_metric = gr.Dropdown(choices=non_rtl_metrics[:-1], label="Select Metric", value=default_metric)
223
  with gr.Row(equal_height=True):
224
+ scatter_plot = gr.Plot(value=generate_scatter_plot(default_benchmark, default_metric), label="Bubble Chart", elem_id="full-width-plot")
225
 
226
  with gr.Tab("About Us"):
227
  gr.HTML(
 
270
  search_box.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
271
  params_slider.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
272
 
 
273
  def on_benchmark_change(benchmark, _):
274
  if benchmark == "RTL-Repo":
275
  metric = "Exact Matching (EM)"
276
  return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
277
  else:
278
+ metric = non_rtl_metrics[0]
279
+ return gr.update(choices=non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
 
 
 
 
280
 
281
  def on_metric_change(benchmark, metric):
282
  benchmark, metric = handle_special_cases(benchmark, metric)
logo.png CHANGED

Git LFS Details

  • SHA256: f35b346cfe8c29b4c34d1fc73558e3e34c294da19e28b99aceb7407efa6945e5
  • Pointer size: 130 Bytes
  • Size of remote file: 33.9 kB