Spaces:
Running
Running
Commit
·
aaf0c71
1
Parent(s):
e70391f
New Leaderboard Update
Browse files- We now have the possibility to filter by task > filter by benchmarks
- We display the Aggregated Scores instead of the Average Scores
- aggregated_scores.csv +22 -0
- app.py +91 -34
- css_html_js.py +4 -1
- parse.py +11 -2
- utils.py +32 -12
aggregated_scores.csv
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,Agg S2R,Agg MC,Agg VerilogEval S2R,Agg VerilogEval MC,Agg RTLLM,Agg VeriGen
|
2 |
+
DeepSeek R1,74.84,75.51,77.01,77.81,68.06,54.4
|
3 |
+
Llama 3.1 405B,49.72,42.8,53.98,42.92,36.43,41.67
|
4 |
+
Llama 3.(1-3) 70B,39.0,38.49,38.64,37.45,40.12,48.05
|
5 |
+
Qwen2.5 72B,49.23,48.82,49.17,51.22,49.45,26.75
|
6 |
+
Qwen2.5 32B,50.58,40.73,50.53,41.85,50.71,30.46
|
7 |
+
StarChat2 15B v0.1,39.04,38.9,37.45,37.69,44.0,49.99
|
8 |
+
DeepSeek R1 Distill Qwen 14B,22.98,23.61,23.21,23.47,22.27,24.91
|
9 |
+
CodeLlama 70B,31.46,31.29,34.17,29.8,22.99,44.96
|
10 |
+
QwenCoder 2.5 32B,42.53,43.71,42.27,43.96,43.33,41.4
|
11 |
+
DeepSeek Coder 33B,25.71,36.47,19.49,37.25,45.11,29.29
|
12 |
+
QwenCoder 2.5 14B,36.75,38.49,35.61,39.03,40.33,33.55
|
13 |
+
OpenCoder 8B,31.13,34.76,27.12,34.55,43.63,36.67
|
14 |
+
QwenCoder 2.5 7B,13.86,32.31,6.31,31.75,37.41,37.47
|
15 |
+
"DeepSeek Coder 6,7B",31.6,30.03,28.69,30.41,40.67,26.61
|
16 |
+
RTLCoder Mistral,21.86,27.2,22.73,26.21,19.15,36.3
|
17 |
+
RTLCoder DeepSeek,32.21,37.6,31.75,37.47,33.64,38.81
|
18 |
+
OriGen,37.22,41.29,46.0,41.97,9.82,35.07
|
19 |
+
HaVen-CodeQwen,41.66,46.09,42.97,46.57,37.55,41.74
|
20 |
+
CodeV-CL-7B,28.19,35.7,25.75,35.39,35.79,38.53
|
21 |
+
CodeV-QW-7B,20.79,47.26,18.73,50.28,27.23,19.55
|
22 |
+
CodeV-DS-6.7B,18.19,44.1,14.28,47.05,30.39,17.03
|
app.py
CHANGED
@@ -3,8 +3,8 @@ import pandas as pd
|
|
3 |
import gradio as gr
|
4 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
5 |
from css_html_js import custom_css, trigger_plot
|
6 |
-
from parse import read_json, read_data
|
7 |
-
from utils import model_hyperlink, filter_RTLRepo, filter_bench, filter_bench_all, handle_special_cases
|
8 |
from typing import Union
|
9 |
from about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
|
10 |
import numpy as np
|
@@ -12,12 +12,26 @@ import plotly.graph_objects as go
|
|
12 |
import plotly.express as px
|
13 |
from gradio.themes.utils import colors
|
14 |
|
15 |
-
def filter_leaderboard(benchmark, model_type, search_query, max_params):
|
16 |
subset = df.copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
if benchmark != 'All':
|
18 |
subset = df[df['Benchmark'] == benchmark]
|
|
|
|
|
19 |
if model_type != 'All':
|
20 |
-
|
21 |
subset = subset[subset['Model Type'] == model_type]
|
22 |
if search_query:
|
23 |
subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
|
@@ -25,11 +39,33 @@ def filter_leaderboard(benchmark, model_type, search_query, max_params):
|
|
25 |
subset = subset[subset['Params'] <= max_params]
|
26 |
|
27 |
if benchmark == 'All':
|
28 |
-
|
|
|
|
|
|
|
29 |
elif benchmark == 'RTL-Repo':
|
30 |
return filter_RTLRepo(subset)
|
31 |
else:
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def generate_scatter_plot(benchmark, metric):
|
35 |
benchmark, metric = handle_special_cases(benchmark, metric)
|
@@ -50,7 +86,7 @@ def generate_scatter_plot(benchmark, metric):
|
|
50 |
scatter_data['x'] = scatter_data['Params']
|
51 |
scatter_data['y'] = scatter_data[metric]
|
52 |
scatter_data['size'] = (scatter_data['x'] ** 0.3) * 40
|
53 |
-
|
54 |
type_colors = {"General": "green", "Coding": "yellow", "RTL-Specific": "blue"}
|
55 |
scatter_data['color'] = scatter_data['Model Type'].map(type_colors).fillna('gray')
|
56 |
|
@@ -65,7 +101,10 @@ def generate_scatter_plot(benchmark, metric):
|
|
65 |
scatter_data, x='x', y='y', log_x=True, size='size', color='Model Type', text='Model',
|
66 |
hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
|
67 |
labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
|
68 |
-
# color_discrete_map={"General": "
|
|
|
|
|
|
|
69 |
height=600, width=1200
|
70 |
)
|
71 |
|
@@ -98,9 +137,14 @@ function refresh() {
|
|
98 |
|
99 |
with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
|
100 |
df, benchmarks, metrics, default_metric = read_data()
|
|
|
|
|
|
|
|
|
101 |
rtl_metrics = ["Exact Matching (EM)"]
|
102 |
-
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
|
103 |
-
|
|
|
104 |
gr.HTML("""
|
105 |
<p align="center" style="margin-bottom: -10px;">
|
106 |
<img src='/gradio_api/file=logo.png' alt='TuRTLe Logo' width='220'/> <br/>
|
@@ -135,22 +179,32 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
135 |
""")
|
136 |
with gr.Tabs():
|
137 |
with gr.Tab("Leaderboard"):
|
138 |
-
with gr.Row():
|
139 |
-
|
140 |
-
|
|
|
|
|
141 |
|
142 |
-
with gr.Row():
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
leaderboard = gr.DataFrame(
|
153 |
-
value=filter_leaderboard('
|
154 |
headers="first row",
|
155 |
show_row_numbers=True,
|
156 |
wrap=True,
|
@@ -159,9 +213,9 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
159 |
column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"]),
|
160 |
|
161 |
with gr.Tab("Interactive Bubble Plot"):
|
162 |
-
with gr.Row():
|
163 |
bubble_benchmark = gr.Radio(choices=benchmarks, label="Select Benchmark", value='VerilogEval S2R')
|
164 |
-
bubble_metric = gr.Radio(choices=non_rtl_metrics, label="Select Metric", value="Syntax (STX)")
|
165 |
scatter_plot = gr.Plot(value=generate_scatter_plot('VerilogEval S2R', default_metric), label="Bubble Chart", elem_id="full-width-plot")
|
166 |
|
167 |
with gr.Tab("About Us"):
|
@@ -202,22 +256,25 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
202 |
)
|
203 |
|
204 |
# event handlers, ugly way but it works
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
-
# RTL-Repo Bubble plot
|
211 |
def on_benchmark_change(benchmark, _):
|
212 |
if benchmark == "RTL-Repo":
|
213 |
metric = "Exact Matching (EM)"
|
214 |
return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
|
215 |
else:
|
216 |
metric = non_rtl_metrics[0] # default to Syntax
|
217 |
-
return gr.update(choices=non_rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
|
218 |
-
# benchmark, metric = handle_special_cases(benchmark, metric)
|
219 |
-
# fig = generate_scatter_plot(benchmark, metric)
|
220 |
-
# return gr.update(value=metric), fig
|
221 |
|
222 |
def on_metric_change(benchmark, metric):
|
223 |
benchmark, metric = handle_special_cases(benchmark, metric)
|
|
|
3 |
import gradio as gr
|
4 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
5 |
from css_html_js import custom_css, trigger_plot
|
6 |
+
from parse import read_json, read_data, parse_agg
|
7 |
+
from utils import model_hyperlink, filter_RTLRepo, filter_bench, filter_bench_all, handle_special_cases, type_emoji
|
8 |
from typing import Union
|
9 |
from about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
|
10 |
import numpy as np
|
|
|
12 |
import plotly.express as px
|
13 |
from gradio.themes.utils import colors
|
14 |
|
15 |
+
def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
|
16 |
subset = df.copy()
|
17 |
+
|
18 |
+
# Filter by task-specific benchmarks when 'All' benchmarks is selected
|
19 |
+
if task == "Spec-to-RTL":
|
20 |
+
valid_benchmarks = s2r_benchs
|
21 |
+
if benchmark == 'All':
|
22 |
+
subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
|
23 |
+
elif task == "Code Completion":
|
24 |
+
valid_benchmarks = cc_benchs
|
25 |
+
if benchmark == 'All':
|
26 |
+
subset = subset[subset['Benchmark'].isin(valid_benchmarks)]
|
27 |
+
|
28 |
+
# Apply benchmark filter if not "All"
|
29 |
if benchmark != 'All':
|
30 |
subset = df[df['Benchmark'] == benchmark]
|
31 |
+
|
32 |
+
# Apply other filters
|
33 |
if model_type != 'All':
|
34 |
+
# For dropdown without emojis
|
35 |
subset = subset[subset['Model Type'] == model_type]
|
36 |
if search_query:
|
37 |
subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
|
|
|
39 |
subset = subset[subset['Params'] <= max_params]
|
40 |
|
41 |
if benchmark == 'All':
|
42 |
+
if task == 'Spec-to-RTL':
|
43 |
+
return filter_bench_all(subset, df_agg, agg_column='Agg S2R')
|
44 |
+
elif task == 'Code Completion':
|
45 |
+
return filter_bench_all(subset, df_agg, agg_column='Agg MC')
|
46 |
elif benchmark == 'RTL-Repo':
|
47 |
return filter_RTLRepo(subset)
|
48 |
else:
|
49 |
+
# Pass the specific benchmark aggregate column
|
50 |
+
agg_column = None
|
51 |
+
if benchmark == 'VerilogEval S2R':
|
52 |
+
agg_column = 'Agg VerilogEval S2R'
|
53 |
+
elif benchmark == 'VerilogEval MC':
|
54 |
+
agg_column = 'Agg VerilogEval MC'
|
55 |
+
elif benchmark == 'RTLLM':
|
56 |
+
agg_column = 'Agg RTLLM'
|
57 |
+
elif benchmark == 'VeriGen':
|
58 |
+
agg_column = 'Agg VeriGen'
|
59 |
+
|
60 |
+
return filter_bench(subset, df_agg, agg_column)
|
61 |
+
|
62 |
+
def update_benchmarks_by_task(task):
|
63 |
+
if task == "Spec-to-RTL":
|
64 |
+
return gr.update(choices=["All"] + s2r_benchs, value="All")
|
65 |
+
elif task == "Code Completion":
|
66 |
+
return gr.update(choices=["All"] + cc_benchs, value="All")
|
67 |
+
else:
|
68 |
+
return gr.update(choices=["All"] + benchmarks, value="All")
|
69 |
|
70 |
def generate_scatter_plot(benchmark, metric):
|
71 |
benchmark, metric = handle_special_cases(benchmark, metric)
|
|
|
86 |
scatter_data['x'] = scatter_data['Params']
|
87 |
scatter_data['y'] = scatter_data[metric]
|
88 |
scatter_data['size'] = (scatter_data['x'] ** 0.3) * 40
|
89 |
+
|
90 |
type_colors = {"General": "green", "Coding": "yellow", "RTL-Specific": "blue"}
|
91 |
scatter_data['color'] = scatter_data['Model Type'].map(type_colors).fillna('gray')
|
92 |
|
|
|
101 |
scatter_data, x='x', y='y', log_x=True, size='size', color='Model Type', text='Model',
|
102 |
hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
|
103 |
labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
|
104 |
+
# color_discrete_map={"General": "
|
105 |
+
#A8D5BA", "Coding": "
|
106 |
+
#F7DC6F", "RTL-Specific": "
|
107 |
+
#87CEFA"},
|
108 |
height=600, width=1200
|
109 |
)
|
110 |
|
|
|
137 |
|
138 |
with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=colors.emerald)) as app:
|
139 |
df, benchmarks, metrics, default_metric = read_data()
|
140 |
+
df_agg = parse_agg("./aggregated_scores.csv")
|
141 |
+
tasks = ["Spec-to-RTL", "Code Completion"]
|
142 |
+
s2r_benchs = ["VerilogEval S2R", "RTLLM"]
|
143 |
+
cc_benchs = ["VerilogEval MC", "VeriGen", "RTL-Repo"]
|
144 |
rtl_metrics = ["Exact Matching (EM)"]
|
145 |
+
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area", "Aggregated ⬆️"]
|
146 |
+
model_types = ['All', 'General', 'Coding', 'RTL-Specific']
|
147 |
+
|
148 |
gr.HTML("""
|
149 |
<p align="center" style="margin-bottom: -10px;">
|
150 |
<img src='/gradio_api/file=logo.png' alt='TuRTLe Logo' width='220'/> <br/>
|
|
|
179 |
""")
|
180 |
with gr.Tabs():
|
181 |
with gr.Tab("Leaderboard"):
|
182 |
+
with gr.Row(equal_height=True):
|
183 |
+
with gr.Column(scale=1):
|
184 |
+
task_radio = gr.Radio(choices=tasks, label="Select Task", value='Spec-to-RTL')
|
185 |
+
with gr.Column(scale=1.75):
|
186 |
+
benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
|
187 |
|
188 |
+
with gr.Row(equal_height=True):
|
189 |
+
with gr.Column(scale=1.9):
|
190 |
+
search_box = gr.Textbox(label="Search Model", placeholder="Type model name...")
|
191 |
+
with gr.Column(scale=1):
|
192 |
+
model_type_dropdown = gr.Dropdown(
|
193 |
+
choices=model_types,
|
194 |
+
label="Select Model Type",
|
195 |
+
value='All'
|
196 |
+
)
|
197 |
+
with gr.Column(scale=2):
|
198 |
+
params_slider = gr.Slider(
|
199 |
+
minimum=df['Params'].min(),
|
200 |
+
maximum=700,
|
201 |
+
value=700,
|
202 |
+
label="Max Params",
|
203 |
+
step=1
|
204 |
+
)
|
205 |
|
206 |
leaderboard = gr.DataFrame(
|
207 |
+
value=filter_leaderboard('Spec-to-RTL', 'All', 'All', "", 700),
|
208 |
headers="first row",
|
209 |
show_row_numbers=True,
|
210 |
wrap=True,
|
|
|
213 |
column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"]),
|
214 |
|
215 |
with gr.Tab("Interactive Bubble Plot"):
|
216 |
+
with gr.Row(equal_height=True):
|
217 |
bubble_benchmark = gr.Radio(choices=benchmarks, label="Select Benchmark", value='VerilogEval S2R')
|
218 |
+
bubble_metric = gr.Radio(choices=non_rtl_metrics[:-1], label="Select Metric", value="Syntax (STX)")
|
219 |
scatter_plot = gr.Plot(value=generate_scatter_plot('VerilogEval S2R', default_metric), label="Bubble Chart", elem_id="full-width-plot")
|
220 |
|
221 |
with gr.Tab("About Us"):
|
|
|
256 |
)
|
257 |
|
258 |
# event handlers, ugly way but it works
|
259 |
+
task_radio.change(
|
260 |
+
fn=update_benchmarks_by_task,
|
261 |
+
inputs=[task_radio],
|
262 |
+
outputs=[benchmark_radio]
|
263 |
+
)
|
264 |
+
task_radio.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
|
265 |
+
benchmark_radio.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
|
266 |
+
model_type_dropdown.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
|
267 |
+
search_box.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
|
268 |
+
params_slider.change(fn=filter_leaderboard, inputs=[task_radio, benchmark_radio, model_type_dropdown, search_box, params_slider], outputs=leaderboard)
|
269 |
|
270 |
+
# RTL-Repo Bubble plot
|
271 |
def on_benchmark_change(benchmark, _):
|
272 |
if benchmark == "RTL-Repo":
|
273 |
metric = "Exact Matching (EM)"
|
274 |
return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
|
275 |
else:
|
276 |
metric = non_rtl_metrics[0] # default to Syntax
|
277 |
+
return gr.update(choices=non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
|
|
|
|
|
|
|
278 |
|
279 |
def on_metric_change(benchmark, metric):
|
280 |
benchmark, metric = handle_special_cases(benchmark, metric)
|
css_html_js.py
CHANGED
@@ -106,7 +106,10 @@ custom_css = """
|
|
106 |
padding:0.5;
|
107 |
}
|
108 |
#box-filter > .form{
|
109 |
-
border: 0
|
|
|
|
|
|
|
110 |
}
|
111 |
"""
|
112 |
|
|
|
106 |
padding:0.5;
|
107 |
}
|
108 |
#box-filter > .form{
|
109 |
+
border: 0;
|
110 |
+
}
|
111 |
+
.slider_input_container {
|
112 |
+
padding-top: 8px;
|
113 |
}
|
114 |
"""
|
115 |
|
parse.py
CHANGED
@@ -30,14 +30,16 @@ model_details = {
|
|
30 |
"OriGen": ("https://huggingface.co/henryen/OriGen_Fix", 6.74, "RTL-Specific")
|
31 |
}
|
32 |
|
33 |
-
def get_headers(reader) -> Union[list, list]:
|
34 |
metrics, benchs = [], []
|
35 |
for i, row in enumerate(reader):
|
36 |
if i == 0:
|
37 |
metrics = row[1:]
|
38 |
-
elif i == 1:
|
39 |
benchs = row[1:]
|
40 |
break
|
|
|
|
|
41 |
return metrics, benchs
|
42 |
|
43 |
def get_model_params_and_url(model) -> Union[str, str, float]:
|
@@ -80,6 +82,13 @@ def parse_results(csv_path: str) -> list[dict]:
|
|
80 |
print(models)
|
81 |
return dataset
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
def writeJson(data: list):
|
84 |
with open('results.json', 'w') as f:
|
85 |
json.dump(data, f, indent=4, ensure_ascii=False)
|
|
|
30 |
"OriGen": ("https://huggingface.co/henryen/OriGen_Fix", 6.74, "RTL-Specific")
|
31 |
}
|
32 |
|
33 |
+
def get_headers(reader, agg=False) -> Union[list, list]:
|
34 |
metrics, benchs = [], []
|
35 |
for i, row in enumerate(reader):
|
36 |
if i == 0:
|
37 |
metrics = row[1:]
|
38 |
+
elif i == 1 and not agg:
|
39 |
benchs = row[1:]
|
40 |
break
|
41 |
+
else:
|
42 |
+
return metrics
|
43 |
return metrics, benchs
|
44 |
|
45 |
def get_model_params_and_url(model) -> Union[str, str, float]:
|
|
|
82 |
print(models)
|
83 |
return dataset
|
84 |
|
85 |
+
def parse_agg(csv_path: str) -> list[dict]:
|
86 |
+
"""
|
87 |
+
Each row has the following format:
|
88 |
+
MODEL | BENCHMARK | TASK | METRIC | RESULT
|
89 |
+
"""
|
90 |
+
return pd.read_csv("aggregated_scores.csv")
|
91 |
+
|
92 |
def writeJson(data: list):
|
93 |
with open('results.json', 'w') as f:
|
94 |
json.dump(data, f, indent=4, ensure_ascii=False)
|
utils.py
CHANGED
@@ -28,30 +28,51 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
|
28 |
filtered_df['Type'] = filtered_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
|
29 |
filtered_df = filtered_df[['Type', 'Model', 'Params', 'Exact Matching (EM)']]
|
30 |
filtered_df = filtered_df.sort_values(by='Exact Matching (EM)', ascending=False).reset_index(drop=True)
|
31 |
-
# filtered_df.insert(0, '', range(1, len(filtered_df) + 1))
|
32 |
return filtered_df
|
33 |
|
34 |
-
def filter_bench(subset: pd.DataFrame) -> pd.DataFrame:
|
35 |
details = subset[['Model', 'Model URL', 'Model Type', 'Params']].drop_duplicates('Model')
|
36 |
pivot_df = subset.pivot_table(index='Model', columns='Metric', values='Score', aggfunc='mean').reset_index()
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
pivot_df = pd.merge(pivot_df, details, on='Model', how='left')
|
39 |
pivot_df['Model'] = pivot_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
|
40 |
pivot_df['Type'] = pivot_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
|
41 |
pivot_df.rename(columns={'Syntax (STX)': 'STX', 'Functionality (FNC)': 'FNC', 'Synthesis (SYN)': 'SYN', 'Performance': 'Perf'}, inplace=True)
|
42 |
-
|
|
|
43 |
pivot_df = pivot_df[[col for col in columns_order if col in pivot_df.columns]]
|
44 |
-
pivot_df = pivot_df.sort_values(by='
|
45 |
-
# pivot_df.insert(0, '', range(1, len(pivot_df) + 1))
|
46 |
return pivot_df
|
47 |
|
48 |
-
def filter_bench_all(subset: pd.DataFrame) -> pd.DataFrame:
|
49 |
details = subset[['Model', 'Model URL', 'Model Type', 'Params']].drop_duplicates('Model')
|
50 |
pivot_df = subset.pivot_table(index='Model', columns='Metric', values='Score', aggfunc='mean').reset_index().round(2)
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
pivot_df = pd.merge(pivot_df, details, on='Model', how='left')
|
53 |
pivot_df['Model'] = pivot_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
|
54 |
pivot_df['Type'] = pivot_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
|
|
|
55 |
pivot_df.rename(columns={
|
56 |
'Exact Matching (EM)': 'EM',
|
57 |
'Syntax (STX)': 'Avg STX',
|
@@ -61,9 +82,8 @@ def filter_bench_all(subset: pd.DataFrame) -> pd.DataFrame:
|
|
61 |
'Performance': 'Avg Perf',
|
62 |
'Area': 'Avg Area',
|
63 |
}, inplace=True)
|
64 |
-
|
65 |
-
columns_order = ['Type', 'Model', 'Params', '
|
66 |
pivot_df = pivot_df[[col for col in columns_order if col in pivot_df.columns]]
|
67 |
-
pivot_df = pivot_df.sort_values(by='
|
68 |
-
# pivot_df.insert(0, '', range(1, len(pivot_df) + 1))
|
69 |
return pivot_df
|
|
|
28 |
filtered_df['Type'] = filtered_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
|
29 |
filtered_df = filtered_df[['Type', 'Model', 'Params', 'Exact Matching (EM)']]
|
30 |
filtered_df = filtered_df.sort_values(by='Exact Matching (EM)', ascending=False).reset_index(drop=True)
|
|
|
31 |
return filtered_df
|
32 |
|
33 |
+
def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
|
34 |
details = subset[['Model', 'Model URL', 'Model Type', 'Params']].drop_duplicates('Model')
|
35 |
pivot_df = subset.pivot_table(index='Model', columns='Metric', values='Score', aggfunc='mean').reset_index()
|
36 |
+
|
37 |
+
if df_agg is not None and agg_column is not None and agg_column in df_agg.columns:
|
38 |
+
agg_data = df_agg[['Model', agg_column]].rename(columns={agg_column: 'Aggregated ⬆️'})
|
39 |
+
pivot_df = pd.merge(pivot_df, agg_data, on='Model', how='left')
|
40 |
+
else:# fallback
|
41 |
+
pivot_df['Aggregated ⬆️'] = pivot_df.mean(axis=1, numeric_only=True).round(2)
|
42 |
+
|
43 |
pivot_df = pd.merge(pivot_df, details, on='Model', how='left')
|
44 |
pivot_df['Model'] = pivot_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
|
45 |
pivot_df['Type'] = pivot_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
|
46 |
pivot_df.rename(columns={'Syntax (STX)': 'STX', 'Functionality (FNC)': 'FNC', 'Synthesis (SYN)': 'SYN', 'Performance': 'Perf'}, inplace=True)
|
47 |
+
|
48 |
+
columns_order = ['Type', 'Model', 'Params', 'Aggregated ⬆️', 'STX', 'FNC', 'SYN', 'Power', 'Perf', 'Area']
|
49 |
pivot_df = pivot_df[[col for col in columns_order if col in pivot_df.columns]]
|
50 |
+
pivot_df = pivot_df.sort_values(by='Aggregated ⬆️', ascending=False).reset_index(drop=True)
|
|
|
51 |
return pivot_df
|
52 |
|
53 |
+
def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
|
54 |
details = subset[['Model', 'Model URL', 'Model Type', 'Params']].drop_duplicates('Model')
|
55 |
pivot_df = subset.pivot_table(index='Model', columns='Metric', values='Score', aggfunc='mean').reset_index().round(2)
|
56 |
+
|
57 |
+
if df_agg is not None:
|
58 |
+
if agg_column is not None and agg_column in df_agg.columns:
|
59 |
+
agg_data = df_agg[['Model', agg_column]].rename(columns={agg_column: 'Aggregated ⬆️'})
|
60 |
+
pivot_df = pd.merge(pivot_df, agg_data, on='Model', how='left')
|
61 |
+
else:
|
62 |
+
agg_columns = [col for col in df_agg.columns if col.startswith('Agg ')]
|
63 |
+
if agg_columns:
|
64 |
+
df_agg['Average_Agg'] = df_agg[agg_columns].mean(axis=1)
|
65 |
+
agg_data = df_agg[['Model', 'Average_Agg']].rename(columns={'Average_Agg': 'Aggregated ⬆️'})
|
66 |
+
pivot_df = pd.merge(pivot_df, agg_data, on='Model', how='left')
|
67 |
+
else: # fallback
|
68 |
+
pivot_df['Aggregated ⬆️'] = pivot_df.mean(axis=1, numeric_only=True).round(2)
|
69 |
+
else: # fallback
|
70 |
+
pivot_df['Aggregated ⬆️'] = pivot_df.mean(axis=1, numeric_only=True).round(2)
|
71 |
+
|
72 |
pivot_df = pd.merge(pivot_df, details, on='Model', how='left')
|
73 |
pivot_df['Model'] = pivot_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
|
74 |
pivot_df['Type'] = pivot_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
|
75 |
+
|
76 |
pivot_df.rename(columns={
|
77 |
'Exact Matching (EM)': 'EM',
|
78 |
'Syntax (STX)': 'Avg STX',
|
|
|
82 |
'Performance': 'Avg Perf',
|
83 |
'Area': 'Avg Area',
|
84 |
}, inplace=True)
|
85 |
+
|
86 |
+
columns_order = ['Type', 'Model', 'Params', 'Aggregated ⬆️', 'Avg STX', 'Avg FNC', 'Avg SYN', 'Avg Power', 'Avg Perf', 'Avg Area']
|
87 |
pivot_df = pivot_df[[col for col in columns_order if col in pivot_df.columns]]
|
88 |
+
pivot_df = pivot_df.sort_values(by='Aggregated ⬆️', ascending=False).reset_index(drop=True)
|
|
|
89 |
return pivot_df
|