Spaces:

HPAI-BSC
/

TuRTLe-Leaderboard

Running

ggcristian commited on Apr 16

Commit

a25be15

1 Parent(s): 40758cf

Remove logic for 'All' and 'Aggregated' for Graph view.

Aggregated scores are computed by each task. In the graph view we do not give the ability to select tasks but to select benchmarks.

Files changed (3) hide show

.gitattributes +1 -0
app.py +19 -69
logo.png +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -76,64 +76,18 @@ def update_benchmarks_by_task(task):
     return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
 def generate_scatter_plot(benchmark, metric):
-    if benchmark == "All":
-        models_data = []
-        for bench in benchmarks:
-            subset = df[df['Benchmark'] == bench]
-            if bench == "RTL-Repo":
-                subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
-            models_in_bench = subset['Model'].unique()
-            models_data.extend([(model, bench) for model in models_in_bench])
-        all_models = list(set([m[0] for m in models_data]))
-        details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
-        if metric == "Aggregated ⬆️":
-            agg_columns = [col for col in df_agg.columns if col.startswith('Agg ')]
-            if agg_columns:
-                agg_data = df_agg.copy()
-                agg_data['Aggregated ⬆️'] = agg_data[agg_columns].mean(axis=1).round(2)
-                scatter_data = pd.merge(details, agg_data[['Model', 'Aggregated ⬆️']], on='Model', how='inner')
-            else:
-                scatter_data = details.copy()
-                scatter_data['Aggregated ⬆️'] = 50  # defaut
-        else:
-            scatter_data = details.copy()
-            metric_data = df[df['Metric'] == metric].groupby('Model')['Score'].mean().reset_index()
-            metric_data = metric_data.rename(columns={'Score': metric})
-            scatter_data = pd.merge(scatter_data, metric_data, on='Model', how='left')
-        scatter_data = scatter_data.dropna(subset=[metric] if metric in scatter_data.columns else ['Aggregated ⬆️'])
     else:
-        # Code we already had for individual benchmark selection
-        benchmark, metric = handle_special_cases(benchmark, metric)
-        subset = df[df['Benchmark'] == benchmark]
-        if benchmark == "RTL-Repo":
-            subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
-            detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
-            detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
-            detailed_scores['Aggregated ⬆️'] = detailed_scores['Exact Matching (EM)']
-        else:
-            agg_column = None
-            detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
-            if benchmark == 'VerilogEval S2R':
-                agg_column = 'Agg VerilogEval S2R'
-            elif benchmark == 'VerilogEval MC':
-                agg_column = 'Agg VerilogEval MC'
-            elif benchmark == 'RTLLM':
-                agg_column = 'Agg RTLLM'
-            elif benchmark == 'VeriGen':
-                agg_column = 'Agg VeriGen'
-            if agg_column and agg_column in df_agg.columns:
-                agg_data = df_agg[['Model', agg_column]].rename(columns={agg_column: 'Aggregated ⬆️'})
-                detailed_scores = pd.merge(detailed_scores, agg_data, on='Model', how='left')
-            else:
-                detailed_scores['Aggregated ⬆️'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1).round(2)
-        details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
-        scatter_data = pd.merge(detailed_scores, details, on='Model', how='left').dropna(subset=['Params', metric])
     scatter_data['x'] = scatter_data['Params']
     scatter_data['y'] = scatter_data[metric]
@@ -144,8 +98,7 @@ def generate_scatter_plot(benchmark, metric):
     y_axis_limits = {
         'Functionality (FNC)': [5, 90], 'Syntax (STX)': [20, 100], 'Synthesis (SYN)': [5, 90],
-        'Power': [0, 50], 'Performance': [0, 50], 'Area': [0, 50], 'Exact Matching (EM)': [0, 50],
-        'Aggregated ⬆️': [0, 80]
     }
     y_range = y_axis_limits.get(metric, [0, 80])
@@ -190,8 +143,8 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
     s2r_benchs = ["VerilogEval S2R", "RTLLM"]
     cc_benchs = ["VerilogEval MC", "VeriGen"]
     lc_benchs = ["RTL-Repo"]
     rtl_metrics = ["Exact Matching (EM)"]
-    non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
     model_types = ['All', 'General', 'Coding', 'RTL-Specific']
     gr.HTML("""
@@ -263,10 +216,12 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
         with gr.Tab("Interactive Bubble Plot"):
             with gr.Row(equal_height=True):
-                bubble_benchmark = gr.Dropdown(choices=["All"] + benchmarks, label="Select Benchmark", value='All', elem_classes="gr-dropdown")
-                bubble_metric = gr.Dropdown(choices=["Aggregated ⬆️"] + non_rtl_metrics[:-1], label="Select Metric", value="Aggregated ⬆️")
             with gr.Row(equal_height=True):
-                scatter_plot = gr.Plot(value=generate_scatter_plot('All', "Aggregated ⬆️"), label="Bubble Chart", elem_id="full-width-plot")
         with gr.Tab("About Us"):
             gr.HTML(
@@ -315,18 +270,13 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
     search_box.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
     params_slider.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
-    # RTL-Repo Bubble plot
     def on_benchmark_change(benchmark, _):
         if benchmark == "RTL-Repo":
             metric = "Exact Matching (EM)"
             return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
         else:
-            if benchmark == "All":
-                metric = "Aggregated ⬆️" # default to Aggregated
-                return gr.update(choices=["Aggregated ⬆️"] + non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
-            else:
-                metric = non_rtl_metrics[0]
-                return gr.update(choices=non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
     def on_metric_change(benchmark, metric):
         benchmark, metric = handle_special_cases(benchmark, metric)

     return gr.update(value=benchmark_value, choices=new_benchmarks), filtered
 def generate_scatter_plot(benchmark, metric):
+    benchmark, metric = handle_special_cases(benchmark, metric)
+    subset = df[df['Benchmark'] == benchmark]
+    if benchmark == "RTL-Repo":
+        subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
+        detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
+        detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
     else:
+        detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
+    details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
+    scatter_data = pd.merge(detailed_scores, details, on='Model', how='left').dropna(subset=['Params', metric])
     scatter_data['x'] = scatter_data['Params']
     scatter_data['y'] = scatter_data[metric]
     y_axis_limits = {
         'Functionality (FNC)': [5, 90], 'Syntax (STX)': [20, 100], 'Synthesis (SYN)': [5, 90],
+        'Power': [0, 50], 'Performance': [0, 50], 'Area': [0, 50], 'Exact Matching (EM)': [0, 50]
     }
     y_range = y_axis_limits.get(metric, [0, 80])
     s2r_benchs = ["VerilogEval S2R", "RTLLM"]
     cc_benchs = ["VerilogEval MC", "VeriGen"]
     lc_benchs = ["RTL-Repo"]
+    non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
     rtl_metrics = ["Exact Matching (EM)"]
     model_types = ['All', 'General', 'Coding', 'RTL-Specific']
     gr.HTML("""
         with gr.Tab("Interactive Bubble Plot"):
             with gr.Row(equal_height=True):
+                default_benchmark = s2r_benchs[0]
+                bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")
+                default_metric = non_rtl_metrics[0]
+                bubble_metric = gr.Dropdown(choices=non_rtl_metrics[:-1], label="Select Metric", value=default_metric)
             with gr.Row(equal_height=True):
+                scatter_plot = gr.Plot(value=generate_scatter_plot(default_benchmark, default_metric), label="Bubble Chart", elem_id="full-width-plot")
         with gr.Tab("About Us"):
             gr.HTML(
     search_box.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
     params_slider.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
     def on_benchmark_change(benchmark, _):
         if benchmark == "RTL-Repo":
             metric = "Exact Matching (EM)"
             return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
         else:
+            metric = non_rtl_metrics[0]
+            return gr.update(choices=non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
     def on_metric_change(benchmark, metric):
         benchmark, metric = handle_special_cases(benchmark, metric)

logo.png CHANGED Viewed

Git LFS Details

SHA256: f35b346cfe8c29b4c34d1fc73558e3e34c294da19e28b99aceb7407efa6945e5
Pointer size: 130 Bytes
Size of remote file: 33.9 kB