Submit + FAQ
Browse files- app.py +21 -3
- src/about.py +22 -3
- src/display/css_html_js.py +6 -0
app.py
CHANGED
|
@@ -189,7 +189,7 @@ def get_model_info_blocks(chosen_model_name):
|
|
| 189 |
with gr.Row():
|
| 190 |
benchmark_score = gr.HTML(get_metric_html("Benchmark Score").format(filtered_df["Benchmark Score"][0]))
|
| 191 |
rank = gr.HTML(get_metric_html("Benchmark Rank").format(filtered_df["Rank"][0]))
|
| 192 |
-
speed = gr.HTML(get_metric_html("Speed").format(filtered_df["Speed (words/sec)"][0]))
|
| 193 |
contamination = gr.HTML(get_metric_html("Contamination Score").format(filtered_df["Contamination Score"][0]))
|
| 194 |
size = gr.HTML(get_metric_html("Size Category").format(filtered_df["Category"][0]))
|
| 195 |
|
|
@@ -318,12 +318,30 @@ with demo:
|
|
| 318 |
with gr.TabItem("🚀 Submit here", elem_id="llm-benchmark-tab-submit", id=5):
|
| 319 |
with gr.Row():
|
| 320 |
gr.Markdown("# Submit your model", elem_classes="markdown-text")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
with gr.Row():
|
| 323 |
with gr.Column():
|
| 324 |
model_name_textbox = gr.Textbox(label="Model name")
|
| 325 |
|
| 326 |
-
submit_button = gr.Button("Submit Eval", variant="huggingface" )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
submission_result = gr.Markdown()
|
| 328 |
submit_button.click(
|
| 329 |
add_new_eval,
|
|
@@ -372,7 +390,7 @@ with demo:
|
|
| 372 |
row_count=5,
|
| 373 |
)
|
| 374 |
|
| 375 |
-
with gr.TabItem("📝
|
| 376 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 377 |
|
| 378 |
with gr.Row():
|
|
|
|
| 189 |
with gr.Row():
|
| 190 |
benchmark_score = gr.HTML(get_metric_html("Benchmark Score").format(filtered_df["Benchmark Score"][0]))
|
| 191 |
rank = gr.HTML(get_metric_html("Benchmark Rank").format(filtered_df["Rank"][0]))
|
| 192 |
+
speed = gr.HTML(get_metric_html("Speed <br/>(words per second)").format(filtered_df["Speed (words/sec)"][0]))
|
| 193 |
contamination = gr.HTML(get_metric_html("Contamination Score").format(filtered_df["Contamination Score"][0]))
|
| 194 |
size = gr.HTML(get_metric_html("Size Category").format(filtered_df["Category"][0]))
|
| 195 |
|
|
|
|
| 318 |
with gr.TabItem("🚀 Submit here", elem_id="llm-benchmark-tab-submit", id=5):
|
| 319 |
with gr.Row():
|
| 320 |
gr.Markdown("# Submit your model", elem_classes="markdown-text")
|
| 321 |
+
with gr.Column():
|
| 322 |
+
gr.Markdown("### Please confirm that you understand and accept the conditions below before submitting your model.")
|
| 323 |
+
prereqs_checkboxes = gr.CheckboxGroup(["I have successfully run the ABB benchmark script on my model using my own infrastructure and I should NOT use the leaderboard for testing purposes",
|
| 324 |
+
"I understand that my account/org have only one submission per month",
|
| 325 |
+
"I understand that I can't submit models more than 15B parameters (learn more in the FAQ)",
|
| 326 |
+
"I understand that submitting contaminated models or models to test the contamination score will lead to action from our side including banning and negative PR"],
|
| 327 |
+
label=None, info=None,
|
| 328 |
+
elem_classes="submit_prereq_checkboxes_container",
|
| 329 |
+
container=False)
|
| 330 |
+
|
| 331 |
+
|
| 332 |
|
| 333 |
with gr.Row():
|
| 334 |
with gr.Column():
|
| 335 |
model_name_textbox = gr.Textbox(label="Model name")
|
| 336 |
|
| 337 |
+
submit_button = gr.Button("Submit Eval", variant="huggingface", interactive=False )
|
| 338 |
+
|
| 339 |
+
prereqs_checkboxes.change(
|
| 340 |
+
fn=lambda choices: gr.update(interactive=len(choices) == 4),
|
| 341 |
+
inputs=prereqs_checkboxes,
|
| 342 |
+
outputs=submit_button
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
submission_result = gr.Markdown()
|
| 346 |
submit_button.click(
|
| 347 |
add_new_eval,
|
|
|
|
| 390 |
row_count=5,
|
| 391 |
)
|
| 392 |
|
| 393 |
+
with gr.TabItem("📝 FAQ", elem_id="llm-benchmark-tab-about", id=6):
|
| 394 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 395 |
|
| 396 |
with gr.Row():
|
src/about.py
CHANGED
|
@@ -67,10 +67,29 @@ Find more details in the about Tab.
|
|
| 67 |
|
| 68 |
# Which evaluations are you running? how can people reproduce what you have?
|
| 69 |
LLM_BENCHMARKS_TEXT = f"""
|
| 70 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
## Reproducibility
|
| 73 |
-
To reproduce our results, here is the commands you can run:
|
| 74 |
|
| 75 |
"""
|
| 76 |
|
|
|
|
| 67 |
|
| 68 |
# Which evaluations are you running? how can people reproduce what you have?
|
| 69 |
LLM_BENCHMARKS_TEXT = f"""
|
| 70 |
+
## What is the difference betweem ABL and ABB?
|
| 71 |
+
|
| 72 |
+
ABL is the Leaderboard which uses ABB benchmarking dataset and code in the backend to produce the results you see here
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
## What can I learn more about ABL and ABB?
|
| 76 |
+
|
| 77 |
+
Feel free to read the following resources
|
| 78 |
+
ABB Page:
|
| 79 |
+
ABL blog post:
|
| 80 |
+
|
| 81 |
+
## How can I reproduce the results?
|
| 82 |
+
|
| 83 |
+
You can easily run the ABB benchmarking code using the following command on Google Collab or your own infratructure.
|
| 84 |
+
|
| 85 |
+
## What is the Benchmark Score?
|
| 86 |
+
|
| 87 |
+
## What is the Contamination Score?
|
| 88 |
+
|
| 89 |
+
## What is the Speed?
|
| 90 |
+
|
| 91 |
+
## Why I am not allowed to submit models more than 15B parameters?
|
| 92 |
|
|
|
|
|
|
|
| 93 |
|
| 94 |
"""
|
| 95 |
|
src/display/css_html_js.py
CHANGED
|
@@ -143,6 +143,12 @@ border-radius: 10px;
|
|
| 143 |
margin: auto;
|
| 144 |
width: 80%;
|
| 145 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
"""
|
| 147 |
|
| 148 |
get_window_url_params = """
|
|
|
|
| 143 |
margin: auto;
|
| 144 |
width: 80%;
|
| 145 |
}
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
.submit_prereq_checkboxes_container div[data-testid=checkbox-group]{
|
| 149 |
+
display: flex;
|
| 150 |
+
flex-direction: column !important;
|
| 151 |
+
}
|
| 152 |
"""
|
| 153 |
|
| 154 |
get_window_url_params = """
|