Arabic-LLM-Broad-Leaderboard

Running

App Files Files Community

karimouda commited on May 7

Commit

cb2c8cb

1 Parent(s): 1cda853

Submit + FAQ

Browse files

Files changed (3) hide show

app.py +21 -3
src/about.py +22 -3
src/display/css_html_js.py +6 -0

app.py CHANGED Viewed

@@ -189,7 +189,7 @@ def get_model_info_blocks(chosen_model_name):
         with gr.Row():
             benchmark_score = gr.HTML(get_metric_html("Benchmark Score").format(filtered_df["Benchmark Score"][0]))
             rank = gr.HTML(get_metric_html("Benchmark Rank").format(filtered_df["Rank"][0]))
-            speed = gr.HTML(get_metric_html("Speed").format(filtered_df["Speed (words/sec)"][0]))
             contamination =  gr.HTML(get_metric_html("Contamination Score").format(filtered_df["Contamination Score"][0]))
             size =  gr.HTML(get_metric_html("Size Category").format(filtered_df["Category"][0]))
@@ -318,12 +318,30 @@ with demo:
         with gr.TabItem("🚀 Submit here", elem_id="llm-benchmark-tab-submit", id=5):
             with gr.Row():
                 gr.Markdown("# Submit your model", elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
-            submit_button = gr.Button("Submit Eval", variant="huggingface" )
             submission_result = gr.Markdown()
             submit_button.click(
                 add_new_eval,
@@ -372,7 +390,7 @@ with demo:
                                 row_count=5,
                             )
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-about", id=6):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
     with gr.Row():

         with gr.Row():
             benchmark_score = gr.HTML(get_metric_html("Benchmark Score").format(filtered_df["Benchmark Score"][0]))
             rank = gr.HTML(get_metric_html("Benchmark Rank").format(filtered_df["Rank"][0]))
+            speed = gr.HTML(get_metric_html("Speed <br/>(words per second)").format(filtered_df["Speed (words/sec)"][0]))
             contamination =  gr.HTML(get_metric_html("Contamination Score").format(filtered_df["Contamination Score"][0]))
             size =  gr.HTML(get_metric_html("Size Category").format(filtered_df["Category"][0]))
         with gr.TabItem("🚀 Submit here", elem_id="llm-benchmark-tab-submit", id=5):
             with gr.Row():
                 gr.Markdown("# Submit your model", elem_classes="markdown-text")
+            with gr.Column():
+                gr.Markdown("### Please confirm that you understand and accept the conditions below before submitting your model.")
+                prereqs_checkboxes = gr.CheckboxGroup(["I have successfully run the ABB benchmark script on my model using my own infrastructure and I should NOT use the leaderboard for testing purposes",
+                                  "I understand that my account/org have only one submission per month",
+                                  "I understand that I can't submit models more than 15B parameters (learn more in the FAQ)",
+                                  "I understand that submitting contaminated models or models to test the contamination score will lead to action from our side including banning and negative PR"],
+                                  label=None, info=None,
+                                  elem_classes="submit_prereq_checkboxes_container",
+                                  container=False)
             with gr.Row():
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
+            submit_button = gr.Button("Submit Eval", variant="huggingface", interactive=False )
+            prereqs_checkboxes.change(
+                    fn=lambda choices: gr.update(interactive=len(choices) == 4),
+                    inputs=prereqs_checkboxes,
+                    outputs=submit_button
+                )
             submission_result = gr.Markdown()
             submit_button.click(
                 add_new_eval,
                                 row_count=5,
                             )
+        with gr.TabItem("📝 FAQ", elem_id="llm-benchmark-tab-about", id=6):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
     with gr.Row():

src/about.py CHANGED Viewed

@@ -67,10 +67,29 @@ Find more details in the about Tab.
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
-## How it works
-## Reproducibility
-To reproduce our results, here is the commands you can run:
 """

 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
+## What is the difference betweem ABL and ABB?
+ABL is the Leaderboard which uses ABB benchmarking dataset and code in the backend to produce the results you see here
+## What can I learn more about ABL and ABB?
+Feel free to read the following resources
+ABB Page:
+ABL blog post:
+## How can I reproduce the results?
+You can easily run the ABB benchmarking code using the following command on Google Collab or your own infratructure.
+## What is the Benchmark Score?
+## What is the Contamination Score?
+## What is the Speed?
+## Why I am not allowed to submit models more than 15B parameters?
 """

src/display/css_html_js.py CHANGED Viewed

@@ -143,6 +143,12 @@ border-radius: 10px;
     margin: auto;
     width: 80%;
 }
 """
 get_window_url_params = """

     margin: auto;
     width: 80%;
 }
+.submit_prereq_checkboxes_container div[data-testid=checkbox-group]{
+    display: flex;
+    flex-direction: column !important;
+}
 """
 get_window_url_params = """