Spaces:

alibayram
/

turkish_mmlu_leaderboard

Running

App Files Files Community

alibayram commited on Nov 16, 2024

Commit

a7fa922

1 Parent(s): 4ecae57

Enhance Gradio app with new filtering and plotting functionalities, improve model submission process, and restructure tabs for better user experience

Browse files

Files changed (1) hide show

app.py +69 -48

app.py CHANGED Viewed

@@ -1,17 +1,13 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
-import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 from huggingface_hub import snapshot_download
-# Define dataset paths and constants
 LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
 RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
 SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
-REPO_ID = "alibayram"
-QUEUE_REPO = "queue-repo"
-RESULTS_REPO = "results-repo"
-TOKEN = "your_hf_token"
 # Load datasets
 try:
@@ -22,73 +18,98 @@ except Exception as e:
     print(f"Error loading datasets: {e}")
     raise
-# Initialize leaderboard
-def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
-    return Leaderboard(
-        value=dataframe,
-        datatype=[col.dtype.name for col in dataframe.dtypes],
-        select_columns=SelectColumns(
-            default_selection=["model", "basari", "toplam_sure"],
-            label="Select Columns to Display",
-        ),
-        search_columns=["model"],
-        filter_columns=[
-            ColumnFilter("family", type="checkboxgroup", label="Model Family"),
-            ColumnFilter("quantization_level", type="checkboxgroup", label="Quantization Level"),
-        ],
-    )
-# Refresh datasets
-def restart_space():
-    snapshot_download(repo_id=QUEUE_REPO, local_dir="queue_cache", repo_type="dataset", token=TOKEN)
-    snapshot_download(repo_id=RESULTS_REPO, local_dir="results_cache", repo_type="dataset", token=TOKEN)
-# Gradio app setup
-demo = gr.Blocks(css=".container { max-width: 1200px; margin: auto; }")
-with demo:
     gr.HTML("<h1>🏆 Turkish MMLU Leaderboard</h1>")
     gr.Markdown("Explore, evaluate, and compare AI model performance.")
-    # Tabs for leaderboard, model responses, and submission
     with gr.Tabs() as tabs:
         with gr.TabItem("Leaderboard"):
-            gr.Markdown("### Explore Leaderboard")
-            leaderboard = init_leaderboard(leaderboard_data)
         with gr.TabItem("Model Responses"):
-            gr.Markdown("### Model Responses")
             model_dropdown = gr.Dropdown(
                 choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
             )
-            query_input = gr.Textbox(label="Search Questions")
             responses_table = gr.DataFrame()
             gr.Button("Search").click(
-                lambda model, query: model_responses_data[model_responses_data["bolum"].str.contains(query)],
-                inputs=[model_dropdown, query_input],
-                outputs=responses_table,
             )
         with gr.TabItem("Submit Model"):
             gr.Markdown("### Submit Your Model for Evaluation")
             model_name = gr.Textbox(label="Model Name")
             revision = gr.Textbox(label="Revision", placeholder="main")
             precision = gr.Dropdown(
                 choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
             )
             submit_button = gr.Button("Submit")
-            submission_result = gr.Markdown()
             submit_button.click(
-                lambda name, rev, prec: f"Submitted {name} with revision {rev} and precision {prec}.",
-                inputs=[model_name, revision, precision],
-                outputs=submission_result,
             )
-# Scheduler for automatic updates
 scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
 # Launch app
-demo.queue(max_size=40).launch()

 import gradio as gr
 from apscheduler.schedulers.background import BackgroundScheduler
 from huggingface_hub import snapshot_download
+import pandas as pd
+import matplotlib.pyplot as plt
+# Dataset paths
 LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
 RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
 SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
 # Load datasets
 try:
     print(f"Error loading datasets: {e}")
     raise
+# Helper functions
+def filter_leaderboard(family=None, quantization_level=None):
+    df = leaderboard_data.copy()
+    if family:
+        df = df[df["family"] == family]
+    if quantization_level:
+        df = df[df["quantization_level"] == quantization_level]
+    return df
+def search_responses(query, model):
+    filtered = model_responses_data[model_responses_data["bolum"].str.contains(query, case=False)]
+    selected_columns = ["bolum", "soru", "cevap", model + "_cevap"]
+    return filtered[selected_columns]
+def plot_section_results():
+    fig, ax = plt.subplots(figsize=(10, 6))
+    avg_scores = section_results_data.mean(numeric_only=True)
+    avg_scores.plot(kind="bar", ax=ax)
+    ax.set_title("Average Section-Wise Performance")
+    ax.set_ylabel("Accuracy (%)")
+    ax.set_xlabel("Sections")
+    return fig  # Return the figure object
+def add_new_model(model_name, base_model, revision, precision, weight_type, model_type):
+    # Simulated model submission logic
+    return f"Model '{model_name}' submitted successfully!"
+# Gradio app structure
+with gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") as app:
     gr.HTML("<h1>🏆 Turkish MMLU Leaderboard</h1>")
     gr.Markdown("Explore, evaluate, and compare AI model performance.")
     with gr.Tabs() as tabs:
+        # Leaderboard Tab
         with gr.TabItem("Leaderboard"):
+            family_filter = gr.Dropdown(
+                choices=leaderboard_data["family"].unique().tolist(), label="Filter by Family", multiselect=False
+            )
+            quantization_filter = gr.Dropdown(
+                choices=leaderboard_data["quantization_level"].unique().tolist(), label="Filter by Quantization Level"
+            )
+            leaderboard_table = gr.DataFrame(leaderboard_data)
+            gr.Button("Apply Filters").click(
+                filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table
+            )
+        # Model Responses Tab
         with gr.TabItem("Model Responses"):
             model_dropdown = gr.Dropdown(
                 choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
             )
+            query_input = gr.Textbox(label="Search Query")
             responses_table = gr.DataFrame()
             gr.Button("Search").click(
+                search_responses, inputs=[query_input, model_dropdown], outputs=responses_table
             )
+        # Section Results Tab
+        with gr.TabItem("Section Results"):
+            gr.Plot(plot_section_results)
+            gr.DataFrame(section_results_data)
+        # Submit Model Tab
         with gr.TabItem("Submit Model"):
             gr.Markdown("### Submit Your Model for Evaluation")
             model_name = gr.Textbox(label="Model Name")
+            base_model = gr.Textbox(label="Base Model")
             revision = gr.Textbox(label="Revision", placeholder="main")
             precision = gr.Dropdown(
                 choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
             )
+            weight_type = gr.Dropdown(
+                choices=["Original", "Delta", "Adapter"], label="Weight Type", value="Original"
+            )
+            model_type = gr.Dropdown(
+                choices=["Transformer", "RNN", "GPT", "Other"], label="Model Type", value="Transformer"
+            )
             submit_button = gr.Button("Submit")
+            submission_output = gr.Markdown()
             submit_button.click(
+                add_new_model,
+                inputs=[model_name, base_model, revision, precision, weight_type, model_type],
+                outputs=submission_output,
             )
+# Scheduler for refreshing datasets
 scheduler = BackgroundScheduler()
+scheduler.add_job(
+    lambda: snapshot_download(repo_id="alibayram", repo_type="dataset", local_dir="cache"),
+    "interval", seconds=1800
+)
 scheduler.start()
 # Launch app
+app.queue(default_concurrency_limit=40).launch()