Spaces:
Running
Running
Commit
·
21a7ca2
1
Parent(s):
9dc61b0
Add MLCAD 2025 citation
Browse files- app.py +6 -6
- static/about.py +7 -5
app.py
CHANGED
@@ -214,7 +214,7 @@ with gr.Blocks(
|
|
214 |
|
215 |
<a href="http://arxiv.org/abs/2504.01986" target="_blank" style="text-decoration: none; margin-right: 10px;">
|
216 |
<button style="background: #b31b1b; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
|
217 |
-
arXiv
|
218 |
</button>
|
219 |
</a>
|
220 |
|
@@ -235,7 +235,8 @@ with gr.Blocks(
|
|
235 |
<p style="margin-bottom: 15px; text-align: start !important;">Welcome to the TuRTLe Model Leaderboard! TuRTLe is a <b>unified evaluation framework designed to systematically assess Large Language Models (LLMs) in RTL (Register-Transfer Level) generation</b> for hardware design.
|
236 |
Evaluation criteria include <b>syntax correctness, functional accuracy, synthesizability, and post-synthesis quality</b> (PPA: Power, Performance, Area). TuRTLe integrates multiple benchmarks to highlight strengths and weaknesses of available LLMs.
|
237 |
Use the filters below to explore different RTL benchmarks, simulators and models.</p>
|
238 |
-
<p style="margin-top:10px; text-align:start !important;"> <span style="font-variant:small-caps; font-weight:bold;">UPDATE (JULY 2025)</span>:
|
|
|
239 |
<p style="margin-top: -6px; text-align: start !important; "><span style="font-variant: small-caps; font-weight: bold;">UPDATE (JUNE 2025)</span>: We make our framework open-source on GitHub and we add 7 new recent models! For a total of 40 base and instruct models and 5 RTL benchmarks</p>
|
240 |
</div>
|
241 |
"""
|
@@ -371,15 +372,14 @@ with gr.Blocks(
|
|
371 |
<div style="max-width: 800px; margin: auto; padding: 20px; border: 1px solid #ccc; border-radius: 10px;">
|
372 |
<ul style="font-size: 16px; margin-bottom: 20px; margin-top: 20px;">
|
373 |
<li><a href="https://github.com/bigcode-project/bigcode-evaluation-harness" target="_blank">Code Generation LM Evaluation Harness</a></li>
|
|
|
|
|
374 |
<li>RTL-Repo: Allam and M. Shalan, “Rtl-repo: A benchmark for evaluating llms on large-scale rtl design projects,” in 2024 IEEE LLM Aided Design Workshop (LAD). IEEE, 2024, pp. 1–5.</li>
|
375 |
<li>VeriGen: S. Thakur, B. Ahmad, H. Pearce, B. Tan, B. Dolan-Gavitt, R. Karri, and S. Garg, “Verigen: A large language model for verilog code generation,” ACM Transactions on Design Automation of Electronic Systems, vol. 29, no. 3, pp. 1–31, 2024. </li>
|
376 |
<li>VerilogEval (I): M. Liu, N. Pinckney, B. Khailany, and H. Ren, “Verilogeval: Evaluating large language models for verilog code generation,” in 2023 IEEE/ACM International Conference on Computer Aided Design (ICCAD). IEEE, 2023, pp. 1–8.</li>
|
377 |
<li>VerilogEval (II): N. Pinckney, C. Batten, M. Liu, H. Ren, and B. Khailany, “Revisiting VerilogEval: A Year of Improvements in Large-Language Models for Hardware Code Generation,” ACM Trans. Des. Autom. Electron. Syst., feb 2025. https://doi.org/10.1145/3718088</li>
|
378 |
<li>RTLLM: Y. Lu, S. Liu, Q. Zhang, and Z. Xie, “Rtllm: An open-source benchmark for design rtl generation with large language model,” in 2024 29th Asia and South Pacific Design Automation Conference (ASP-DAC). IEEE, 2024, pp. 722–727.</li>
|
379 |
</ul>
|
380 |
-
<p style="font-size: 16px; margin-top: 15px;">
|
381 |
-
Feel free to contact us:
|
382 |
-
</p>
|
383 |
</div>
|
384 |
"""
|
385 |
)
|
@@ -388,7 +388,7 @@ with gr.Blocks(
|
|
388 |
citation_button = gr.Textbox(
|
389 |
value=CITATION_BUTTON_TEXT,
|
390 |
label=CITATION_BUTTON_LABEL,
|
391 |
-
lines=
|
392 |
elem_id="citation-button",
|
393 |
show_copy_button=True,
|
394 |
)
|
|
|
214 |
|
215 |
<a href="http://arxiv.org/abs/2504.01986" target="_blank" style="text-decoration: none; margin-right: 10px;">
|
216 |
<button style="background: #b31b1b; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
|
217 |
+
arXiv MLCAD 2025
|
218 |
</button>
|
219 |
</a>
|
220 |
|
|
|
235 |
<p style="margin-bottom: 15px; text-align: start !important;">Welcome to the TuRTLe Model Leaderboard! TuRTLe is a <b>unified evaluation framework designed to systematically assess Large Language Models (LLMs) in RTL (Register-Transfer Level) generation</b> for hardware design.
|
236 |
Evaluation criteria include <b>syntax correctness, functional accuracy, synthesizability, and post-synthesis quality</b> (PPA: Power, Performance, Area). TuRTLe integrates multiple benchmarks to highlight strengths and weaknesses of available LLMs.
|
237 |
Use the filters below to explore different RTL benchmarks, simulators and models.</p>
|
238 |
+
<p style="margin-top:10px; text-align:start !important;"> <span style="font-variant:small-caps; font-weight:bold;">UPDATE (JULY 2025)</span>: Our TuRTLe paper has been accepted to <a href="https://mlcad.org/symposium/2025/" target="_blank"><b>MLCAD 2025</b></a> which will be held in September in Santa Cruz, California!</p>
|
239 |
+
<p style="margin-top: -6px; text-align:start !important;"> <span style="font-variant:small-caps; font-weight:bold;">UPDATE (JULY 2025)</span>: Verilator has been added as an additional simulator alongside Icarus Verilog. You can now filter and compare results by simulator</p>
|
240 |
<p style="margin-top: -6px; text-align: start !important; "><span style="font-variant: small-caps; font-weight: bold;">UPDATE (JUNE 2025)</span>: We make our framework open-source on GitHub and we add 7 new recent models! For a total of 40 base and instruct models and 5 RTL benchmarks</p>
|
241 |
</div>
|
242 |
"""
|
|
|
372 |
<div style="max-width: 800px; margin: auto; padding: 20px; border: 1px solid #ccc; border-radius: 10px;">
|
373 |
<ul style="font-size: 16px; margin-bottom: 20px; margin-top: 20px;">
|
374 |
<li><a href="https://github.com/bigcode-project/bigcode-evaluation-harness" target="_blank">Code Generation LM Evaluation Harness</a></li>
|
375 |
+
<li>Williams, S. Icarus Verilog [Computer software]. <a href="https://github.com/steveicarus/iverilog" target="_blank">https://github.com/steveicarus/iverilog</a></li>
|
376 |
+
<li>Snyder, W., Wasson, P., Galbi, D., & et al. Verilator [Computer software]. <a href="https://github.com/verilator/verilator" target="_blank">https://github.com/verilator/verilator</a></li>
|
377 |
<li>RTL-Repo: Allam and M. Shalan, “Rtl-repo: A benchmark for evaluating llms on large-scale rtl design projects,” in 2024 IEEE LLM Aided Design Workshop (LAD). IEEE, 2024, pp. 1–5.</li>
|
378 |
<li>VeriGen: S. Thakur, B. Ahmad, H. Pearce, B. Tan, B. Dolan-Gavitt, R. Karri, and S. Garg, “Verigen: A large language model for verilog code generation,” ACM Transactions on Design Automation of Electronic Systems, vol. 29, no. 3, pp. 1–31, 2024. </li>
|
379 |
<li>VerilogEval (I): M. Liu, N. Pinckney, B. Khailany, and H. Ren, “Verilogeval: Evaluating large language models for verilog code generation,” in 2023 IEEE/ACM International Conference on Computer Aided Design (ICCAD). IEEE, 2023, pp. 1–8.</li>
|
380 |
<li>VerilogEval (II): N. Pinckney, C. Batten, M. Liu, H. Ren, and B. Khailany, “Revisiting VerilogEval: A Year of Improvements in Large-Language Models for Hardware Code Generation,” ACM Trans. Des. Autom. Electron. Syst., feb 2025. https://doi.org/10.1145/3718088</li>
|
381 |
<li>RTLLM: Y. Lu, S. Liu, Q. Zhang, and Z. Xie, “Rtllm: An open-source benchmark for design rtl generation with large language model,” in 2024 29th Asia and South Pacific Design Automation Conference (ASP-DAC). IEEE, 2024, pp. 722–727.</li>
|
382 |
</ul>
|
|
|
|
|
|
|
383 |
</div>
|
384 |
"""
|
385 |
)
|
|
|
388 |
citation_button = gr.Textbox(
|
389 |
value=CITATION_BUTTON_TEXT,
|
390 |
label=CITATION_BUTTON_LABEL,
|
391 |
+
lines=14,
|
392 |
elem_id="citation-button",
|
393 |
show_copy_button=True,
|
394 |
)
|
static/about.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
2 |
-
CITATION_BUTTON_TEXT = r"""@
|
3 |
title={TuRTLe: A Unified Evaluation of LLMs for RTL Generation},
|
4 |
author={Dario Garcia-Gasulla and Gokcen Kestor and Emanuele Parisi and Miquel Albert\'i-Binimelis and Cristian Gutierrez and Razine Moundir Ghorab and Orlando Montenegro and Bernat Homs and Miquel Moreto},
|
|
|
|
|
5 |
year={2025},
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
url={https://arxiv.org/abs/2504.01986},
|
10 |
-
}"""
|
|
|
1 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
2 |
+
CITATION_BUTTON_TEXT = r"""@inproceedings{garciagasulla2025turtleunifiedevaluationllms,
|
3 |
title={TuRTLe: A Unified Evaluation of LLMs for RTL Generation},
|
4 |
author={Dario Garcia-Gasulla and Gokcen Kestor and Emanuele Parisi and Miquel Albert\'i-Binimelis and Cristian Gutierrez and Razine Moundir Ghorab and Orlando Montenegro and Bernat Homs and Miquel Moreto},
|
5 |
+
booktitle = {Proceedings of the 2025 ACM/IEEE International Symposium on Machine Learning for CAD},
|
6 |
+
series = {MLCAD '25}
|
7 |
year={2025},
|
8 |
+
publisher = {Association for Computing Machinery},
|
9 |
+
address = {New York, NY, USA},
|
10 |
+
location = {Santa Cruz, CA, USA},
|
11 |
url={https://arxiv.org/abs/2504.01986},
|
12 |
+
}"""
|