alibayram's picture
Enhance Gradio app with new filtering and plotting functionalities, improve model submission process, and restructure tabs for better user experience
a7fa922
raw
history blame
4.8 kB
import gradio as gr
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
import pandas as pd
import matplotlib.pyplot as plt
# Dataset paths
LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
# Load datasets
try:
leaderboard_data = pd.read_parquet(LEADERBOARD_PATH)
model_responses_data = pd.read_parquet(RESPONSES_PATH)
section_results_data = pd.read_parquet(SECTION_RESULTS_PATH)
except Exception as e:
print(f"Error loading datasets: {e}")
raise
# Helper functions
def filter_leaderboard(family=None, quantization_level=None):
df = leaderboard_data.copy()
if family:
df = df[df["family"] == family]
if quantization_level:
df = df[df["quantization_level"] == quantization_level]
return df
def search_responses(query, model):
filtered = model_responses_data[model_responses_data["bolum"].str.contains(query, case=False)]
selected_columns = ["bolum", "soru", "cevap", model + "_cevap"]
return filtered[selected_columns]
def plot_section_results():
fig, ax = plt.subplots(figsize=(10, 6))
avg_scores = section_results_data.mean(numeric_only=True)
avg_scores.plot(kind="bar", ax=ax)
ax.set_title("Average Section-Wise Performance")
ax.set_ylabel("Accuracy (%)")
ax.set_xlabel("Sections")
return fig # Return the figure object
def add_new_model(model_name, base_model, revision, precision, weight_type, model_type):
# Simulated model submission logic
return f"Model '{model_name}' submitted successfully!"
# Gradio app structure
with gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") as app:
gr.HTML("<h1>πŸ† Turkish MMLU Leaderboard</h1>")
gr.Markdown("Explore, evaluate, and compare AI model performance.")
with gr.Tabs() as tabs:
# Leaderboard Tab
with gr.TabItem("Leaderboard"):
family_filter = gr.Dropdown(
choices=leaderboard_data["family"].unique().tolist(), label="Filter by Family", multiselect=False
)
quantization_filter = gr.Dropdown(
choices=leaderboard_data["quantization_level"].unique().tolist(), label="Filter by Quantization Level"
)
leaderboard_table = gr.DataFrame(leaderboard_data)
gr.Button("Apply Filters").click(
filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table
)
# Model Responses Tab
with gr.TabItem("Model Responses"):
model_dropdown = gr.Dropdown(
choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
)
query_input = gr.Textbox(label="Search Query")
responses_table = gr.DataFrame()
gr.Button("Search").click(
search_responses, inputs=[query_input, model_dropdown], outputs=responses_table
)
# Section Results Tab
with gr.TabItem("Section Results"):
gr.Plot(plot_section_results)
gr.DataFrame(section_results_data)
# Submit Model Tab
with gr.TabItem("Submit Model"):
gr.Markdown("### Submit Your Model for Evaluation")
model_name = gr.Textbox(label="Model Name")
base_model = gr.Textbox(label="Base Model")
revision = gr.Textbox(label="Revision", placeholder="main")
precision = gr.Dropdown(
choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
)
weight_type = gr.Dropdown(
choices=["Original", "Delta", "Adapter"], label="Weight Type", value="Original"
)
model_type = gr.Dropdown(
choices=["Transformer", "RNN", "GPT", "Other"], label="Model Type", value="Transformer"
)
submit_button = gr.Button("Submit")
submission_output = gr.Markdown()
submit_button.click(
add_new_model,
inputs=[model_name, base_model, revision, precision, weight_type, model_type],
outputs=submission_output,
)
# Scheduler for refreshing datasets
scheduler = BackgroundScheduler()
scheduler.add_job(
lambda: snapshot_download(repo_id="alibayram", repo_type="dataset", local_dir="cache"),
"interval", seconds=1800
)
scheduler.start()
# Launch app
app.queue(default_concurrency_limit=40).launch()