Spaces:

alibayram
/

turkish_mmlu_leaderboard

Runtime error

App Files Files Community

alibayram commited on Nov 16, 2024

Commit

4ecae57

1 Parent(s): 8e219a6

Refactor Gradio app to enhance leaderboard functionality, improve model response search, and streamline model submission process

Browse files

Files changed (1) hide show

app.py +48 -69

app.py CHANGED Viewed

@@ -1,13 +1,17 @@
 import gradio as gr
 from apscheduler.schedulers.background import BackgroundScheduler
 from huggingface_hub import snapshot_download
-import pandas as pd
-import matplotlib.pyplot as plt
-# Dataset paths
 LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
 RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
 SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
 # Load datasets
 try:
@@ -18,98 +22,73 @@ except Exception as e:
     print(f"Error loading datasets: {e}")
     raise
-# Helper functions
-def filter_leaderboard(family=None, quantization_level=None):
-    df = leaderboard_data.copy()
-    if family:
-        df = df[df["family"] == family]
-    if quantization_level:
-        df = df[df["quantization_level"] == quantization_level]
-    return df
-def search_responses(query, model):
-    filtered = model_responses_data[model_responses_data["bolum"].str.contains(query, case=False)]
-    selected_columns = ["bolum", "soru", "cevap", model + "_cevap"]
-    return filtered[selected_columns]
-def plot_section_results():
-    fig, ax = plt.subplots(figsize=(10, 6))
-    avg_scores = section_results_data.mean(numeric_only=True)
-    avg_scores.plot(kind="bar", ax=ax)
-    ax.set_title("Average Section-Wise Performance")
-    ax.set_ylabel("Accuracy (%)")
-    ax.set_xlabel("Sections")
-    return fig  # Return the figure object
-def add_new_model(model_name, base_model, revision, precision, weight_type, model_type):
-    # Simulated model submission logic
-    return f"Model '{model_name}' submitted successfully!"
-# Gradio app structure
-with gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") as app:
     gr.HTML("<h1>🏆 Turkish MMLU Leaderboard</h1>")
     gr.Markdown("Explore, evaluate, and compare AI model performance.")
     with gr.Tabs() as tabs:
-        # Leaderboard Tab
         with gr.TabItem("Leaderboard"):
-            family_filter = gr.Dropdown(
-                choices=leaderboard_data["family"].unique().tolist(), label="Filter by Family", multiselect=False
-            )
-            quantization_filter = gr.Dropdown(
-                choices=leaderboard_data["quantization_level"].unique().tolist(), label="Filter by Quantization Level"
-            )
-            leaderboard_table = gr.DataFrame(leaderboard_data)
-            gr.Button("Apply Filters").click(
-                filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table
-            )
-        # Model Responses Tab
         with gr.TabItem("Model Responses"):
             model_dropdown = gr.Dropdown(
                 choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
             )
-            query_input = gr.Textbox(label="Search Query")
             responses_table = gr.DataFrame()
             gr.Button("Search").click(
-                search_responses, inputs=[query_input, model_dropdown], outputs=responses_table
             )
-        # Section Results Tab
-        with gr.TabItem("Section Results"):
-            gr.Plot(plot_section_results)
-            gr.DataFrame(section_results_data)
-        # Submit Model Tab
         with gr.TabItem("Submit Model"):
             gr.Markdown("### Submit Your Model for Evaluation")
             model_name = gr.Textbox(label="Model Name")
-            base_model = gr.Textbox(label="Base Model")
             revision = gr.Textbox(label="Revision", placeholder="main")
             precision = gr.Dropdown(
                 choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
             )
-            weight_type = gr.Dropdown(
-                choices=["Original", "Delta", "Adapter"], label="Weight Type", value="Original"
-            )
-            model_type = gr.Dropdown(
-                choices=["Transformer", "RNN", "GPT", "Other"], label="Model Type", value="Transformer"
-            )
             submit_button = gr.Button("Submit")
-            submission_output = gr.Markdown()
             submit_button.click(
-                add_new_model,
-                inputs=[model_name, base_model, revision, precision, weight_type, model_type],
-                outputs=submission_output,
             )
-# Scheduler for refreshing datasets
 scheduler = BackgroundScheduler()
-scheduler.add_job(
-    lambda: snapshot_download(repo_id="alibayram", repo_type="dataset", local_dir="cache"),
-    "interval", seconds=1800
-)
 scheduler.start()
 # Launch app
-app.queue(concurrency_count=40).launch()

 import gradio as gr
+from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
+import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 from huggingface_hub import snapshot_download
+# Define dataset paths and constants
 LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
 RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
 SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
+REPO_ID = "alibayram"
+QUEUE_REPO = "queue-repo"
+RESULTS_REPO = "results-repo"
+TOKEN = "your_hf_token"
 # Load datasets
 try:
     print(f"Error loading datasets: {e}")
     raise
+# Initialize leaderboard
+def init_leaderboard(dataframe):
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+    return Leaderboard(
+        value=dataframe,
+        datatype=[col.dtype.name for col in dataframe.dtypes],
+        select_columns=SelectColumns(
+            default_selection=["model", "basari", "toplam_sure"],
+            label="Select Columns to Display",
+        ),
+        search_columns=["model"],
+        filter_columns=[
+            ColumnFilter("family", type="checkboxgroup", label="Model Family"),
+            ColumnFilter("quantization_level", type="checkboxgroup", label="Quantization Level"),
+        ],
+    )
+# Refresh datasets
+def restart_space():
+    snapshot_download(repo_id=QUEUE_REPO, local_dir="queue_cache", repo_type="dataset", token=TOKEN)
+    snapshot_download(repo_id=RESULTS_REPO, local_dir="results_cache", repo_type="dataset", token=TOKEN)
+# Gradio app setup
+demo = gr.Blocks(css=".container { max-width: 1200px; margin: auto; }")
+with demo:
     gr.HTML("<h1>🏆 Turkish MMLU Leaderboard</h1>")
     gr.Markdown("Explore, evaluate, and compare AI model performance.")
+    # Tabs for leaderboard, model responses, and submission
     with gr.Tabs() as tabs:
         with gr.TabItem("Leaderboard"):
+            gr.Markdown("### Explore Leaderboard")
+            leaderboard = init_leaderboard(leaderboard_data)
         with gr.TabItem("Model Responses"):
+            gr.Markdown("### Model Responses")
             model_dropdown = gr.Dropdown(
                 choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
             )
+            query_input = gr.Textbox(label="Search Questions")
             responses_table = gr.DataFrame()
             gr.Button("Search").click(
+                lambda model, query: model_responses_data[model_responses_data["bolum"].str.contains(query)],
+                inputs=[model_dropdown, query_input],
+                outputs=responses_table,
             )
         with gr.TabItem("Submit Model"):
             gr.Markdown("### Submit Your Model for Evaluation")
             model_name = gr.Textbox(label="Model Name")
             revision = gr.Textbox(label="Revision", placeholder="main")
             precision = gr.Dropdown(
                 choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
             )
             submit_button = gr.Button("Submit")
+            submission_result = gr.Markdown()
             submit_button.click(
+                lambda name, rev, prec: f"Submitted {name} with revision {rev} and precision {prec}.",
+                inputs=[model_name, revision, precision],
+                outputs=submission_result,
             )
+# Scheduler for automatic updates
 scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
 # Launch app
+demo.queue(max_size=40).launch()