Spaces:
Running
Running
Commit
·
a25be15
1
Parent(s):
40758cf
Remove logic for 'All' and 'Aggregated' for Graph view.
Browse filesAggregated scores are computed by each task. In the graph view we do not give the ability to select tasks but to select benchmarks.
- .gitattributes +1 -0
- app.py +19 -69
- logo.png +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -76,64 +76,18 @@ def update_benchmarks_by_task(task):
|
|
76 |
return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
|
77 |
|
78 |
def generate_scatter_plot(benchmark, metric):
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
models_in_bench = subset['Model'].unique()
|
87 |
-
models_data.extend([(model, bench) for model in models_in_bench])
|
88 |
-
|
89 |
-
all_models = list(set([m[0] for m in models_data]))
|
90 |
-
details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
|
91 |
-
|
92 |
-
if metric == "Aggregated ⬆️":
|
93 |
-
agg_columns = [col for col in df_agg.columns if col.startswith('Agg ')]
|
94 |
-
if agg_columns:
|
95 |
-
agg_data = df_agg.copy()
|
96 |
-
agg_data['Aggregated ⬆️'] = agg_data[agg_columns].mean(axis=1).round(2)
|
97 |
-
scatter_data = pd.merge(details, agg_data[['Model', 'Aggregated ⬆️']], on='Model', how='inner')
|
98 |
-
else:
|
99 |
-
scatter_data = details.copy()
|
100 |
-
scatter_data['Aggregated ⬆️'] = 50 # defaut
|
101 |
-
else:
|
102 |
-
scatter_data = details.copy()
|
103 |
-
metric_data = df[df['Metric'] == metric].groupby('Model')['Score'].mean().reset_index()
|
104 |
-
metric_data = metric_data.rename(columns={'Score': metric})
|
105 |
-
scatter_data = pd.merge(scatter_data, metric_data, on='Model', how='left')
|
106 |
-
scatter_data = scatter_data.dropna(subset=[metric] if metric in scatter_data.columns else ['Aggregated ⬆️'])
|
107 |
-
|
108 |
else:
|
109 |
-
|
110 |
-
benchmark, metric = handle_special_cases(benchmark, metric)
|
111 |
|
112 |
-
|
113 |
-
|
114 |
-
subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
|
115 |
-
detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
|
116 |
-
detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
|
117 |
-
detailed_scores['Aggregated ⬆️'] = detailed_scores['Exact Matching (EM)']
|
118 |
-
else:
|
119 |
-
agg_column = None
|
120 |
-
detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
|
121 |
-
if benchmark == 'VerilogEval S2R':
|
122 |
-
agg_column = 'Agg VerilogEval S2R'
|
123 |
-
elif benchmark == 'VerilogEval MC':
|
124 |
-
agg_column = 'Agg VerilogEval MC'
|
125 |
-
elif benchmark == 'RTLLM':
|
126 |
-
agg_column = 'Agg RTLLM'
|
127 |
-
elif benchmark == 'VeriGen':
|
128 |
-
agg_column = 'Agg VeriGen'
|
129 |
-
if agg_column and agg_column in df_agg.columns:
|
130 |
-
agg_data = df_agg[['Model', agg_column]].rename(columns={agg_column: 'Aggregated ⬆️'})
|
131 |
-
detailed_scores = pd.merge(detailed_scores, agg_data, on='Model', how='left')
|
132 |
-
else:
|
133 |
-
detailed_scores['Aggregated ⬆️'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1).round(2)
|
134 |
-
|
135 |
-
details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
|
136 |
-
scatter_data = pd.merge(detailed_scores, details, on='Model', how='left').dropna(subset=['Params', metric])
|
137 |
|
138 |
scatter_data['x'] = scatter_data['Params']
|
139 |
scatter_data['y'] = scatter_data[metric]
|
@@ -144,8 +98,7 @@ def generate_scatter_plot(benchmark, metric):
|
|
144 |
|
145 |
y_axis_limits = {
|
146 |
'Functionality (FNC)': [5, 90], 'Syntax (STX)': [20, 100], 'Synthesis (SYN)': [5, 90],
|
147 |
-
'Power': [0, 50], 'Performance': [0, 50], 'Area': [0, 50], 'Exact Matching (EM)': [0, 50]
|
148 |
-
'Aggregated ⬆️': [0, 80]
|
149 |
}
|
150 |
y_range = y_axis_limits.get(metric, [0, 80])
|
151 |
|
@@ -190,8 +143,8 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
190 |
s2r_benchs = ["VerilogEval S2R", "RTLLM"]
|
191 |
cc_benchs = ["VerilogEval MC", "VeriGen"]
|
192 |
lc_benchs = ["RTL-Repo"]
|
|
|
193 |
rtl_metrics = ["Exact Matching (EM)"]
|
194 |
-
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
|
195 |
model_types = ['All', 'General', 'Coding', 'RTL-Specific']
|
196 |
|
197 |
gr.HTML("""
|
@@ -263,10 +216,12 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
263 |
|
264 |
with gr.Tab("Interactive Bubble Plot"):
|
265 |
with gr.Row(equal_height=True):
|
266 |
-
|
267 |
-
|
|
|
|
|
268 |
with gr.Row(equal_height=True):
|
269 |
-
scatter_plot = gr.Plot(value=generate_scatter_plot(
|
270 |
|
271 |
with gr.Tab("About Us"):
|
272 |
gr.HTML(
|
@@ -315,18 +270,13 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
315 |
search_box.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
|
316 |
params_slider.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
|
317 |
|
318 |
-
# RTL-Repo Bubble plot
|
319 |
def on_benchmark_change(benchmark, _):
|
320 |
if benchmark == "RTL-Repo":
|
321 |
metric = "Exact Matching (EM)"
|
322 |
return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
|
323 |
else:
|
324 |
-
|
325 |
-
|
326 |
-
return gr.update(choices=["Aggregated ⬆️"] + non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
|
327 |
-
else:
|
328 |
-
metric = non_rtl_metrics[0]
|
329 |
-
return gr.update(choices=non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
|
330 |
|
331 |
def on_metric_change(benchmark, metric):
|
332 |
benchmark, metric = handle_special_cases(benchmark, metric)
|
|
|
76 |
return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
|
77 |
|
78 |
def generate_scatter_plot(benchmark, metric):
|
79 |
+
benchmark, metric = handle_special_cases(benchmark, metric)
|
80 |
+
|
81 |
+
subset = df[df['Benchmark'] == benchmark]
|
82 |
+
if benchmark == "RTL-Repo":
|
83 |
+
subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
|
84 |
+
detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
|
85 |
+
detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
else:
|
87 |
+
detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
|
|
|
88 |
|
89 |
+
details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
|
90 |
+
scatter_data = pd.merge(detailed_scores, details, on='Model', how='left').dropna(subset=['Params', metric])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
scatter_data['x'] = scatter_data['Params']
|
93 |
scatter_data['y'] = scatter_data[metric]
|
|
|
98 |
|
99 |
y_axis_limits = {
|
100 |
'Functionality (FNC)': [5, 90], 'Syntax (STX)': [20, 100], 'Synthesis (SYN)': [5, 90],
|
101 |
+
'Power': [0, 50], 'Performance': [0, 50], 'Area': [0, 50], 'Exact Matching (EM)': [0, 50]
|
|
|
102 |
}
|
103 |
y_range = y_axis_limits.get(metric, [0, 80])
|
104 |
|
|
|
143 |
s2r_benchs = ["VerilogEval S2R", "RTLLM"]
|
144 |
cc_benchs = ["VerilogEval MC", "VeriGen"]
|
145 |
lc_benchs = ["RTL-Repo"]
|
146 |
+
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
|
147 |
rtl_metrics = ["Exact Matching (EM)"]
|
|
|
148 |
model_types = ['All', 'General', 'Coding', 'RTL-Specific']
|
149 |
|
150 |
gr.HTML("""
|
|
|
216 |
|
217 |
with gr.Tab("Interactive Bubble Plot"):
|
218 |
with gr.Row(equal_height=True):
|
219 |
+
default_benchmark = s2r_benchs[0]
|
220 |
+
bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")
|
221 |
+
default_metric = non_rtl_metrics[0]
|
222 |
+
bubble_metric = gr.Dropdown(choices=non_rtl_metrics[:-1], label="Select Metric", value=default_metric)
|
223 |
with gr.Row(equal_height=True):
|
224 |
+
scatter_plot = gr.Plot(value=generate_scatter_plot(default_benchmark, default_metric), label="Bubble Chart", elem_id="full-width-plot")
|
225 |
|
226 |
with gr.Tab("About Us"):
|
227 |
gr.HTML(
|
|
|
270 |
search_box.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
|
271 |
params_slider.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
|
272 |
|
|
|
273 |
def on_benchmark_change(benchmark, _):
|
274 |
if benchmark == "RTL-Repo":
|
275 |
metric = "Exact Matching (EM)"
|
276 |
return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
|
277 |
else:
|
278 |
+
metric = non_rtl_metrics[0]
|
279 |
+
return gr.update(choices=non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
|
|
|
|
|
|
|
|
|
280 |
|
281 |
def on_metric_change(benchmark, metric):
|
282 |
benchmark, metric = handle_special_cases(benchmark, metric)
|
logo.png
CHANGED
![]() |
![]() |
Git LFS Details
|