Spaces:
Runtime error
Runtime error
import gradio as gr | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from huggingface_hub import snapshot_download | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# Dataset paths | |
LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet" | |
RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet" | |
SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet" | |
# Load datasets | |
try: | |
leaderboard_data = pd.read_parquet(LEADERBOARD_PATH) | |
model_responses_data = pd.read_parquet(RESPONSES_PATH) | |
section_results_data = pd.read_parquet(SECTION_RESULTS_PATH) | |
except Exception as e: | |
print(f"Error loading datasets: {e}") | |
raise | |
# Helper functions | |
def filter_leaderboard(family=None, quantization_level=None): | |
df = leaderboard_data.copy() | |
if family: | |
df = df[df["family"] == family] | |
if quantization_level: | |
df = df[df["quantization_level"] == quantization_level] | |
return df | |
def search_responses(query, model): | |
filtered = model_responses_data[model_responses_data["bolum"].str.contains(query, case=False)] | |
selected_columns = ["bolum", "soru", "cevap", model + "_cevap"] | |
return filtered[selected_columns] | |
def plot_section_results(): | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
avg_scores = section_results_data.mean(numeric_only=True) | |
avg_scores.plot(kind="bar", ax=ax) | |
ax.set_title("Average Section-Wise Performance") | |
ax.set_ylabel("Accuracy (%)") | |
ax.set_xlabel("Sections") | |
return fig # Return the figure object | |
def add_new_model(model_name, base_model, revision, precision, weight_type, model_type): | |
# Simulated model submission logic | |
return f"Model '{model_name}' submitted successfully!" | |
# Gradio app structure | |
with gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") as app: | |
gr.HTML("<h1>π Turkish MMLU Leaderboard</h1>") | |
gr.Markdown("Explore, evaluate, and compare AI model performance.") | |
with gr.Tabs() as tabs: | |
# Leaderboard Tab | |
with gr.TabItem("Leaderboard"): | |
family_filter = gr.Dropdown( | |
choices=leaderboard_data["family"].unique().tolist(), label="Filter by Family", multiselect=False | |
) | |
quantization_filter = gr.Dropdown( | |
choices=leaderboard_data["quantization_level"].unique().tolist(), label="Filter by Quantization Level" | |
) | |
leaderboard_table = gr.DataFrame(leaderboard_data) | |
gr.Button("Apply Filters").click( | |
filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table | |
) | |
# Model Responses Tab | |
with gr.TabItem("Model Responses"): | |
model_dropdown = gr.Dropdown( | |
choices=leaderboard_data["model"].unique().tolist(), label="Select Model" | |
) | |
query_input = gr.Textbox(label="Search Query") | |
responses_table = gr.DataFrame() | |
gr.Button("Search").click( | |
search_responses, inputs=[query_input, model_dropdown], outputs=responses_table | |
) | |
# Section Results Tab | |
with gr.TabItem("Section Results"): | |
gr.Plot(plot_section_results) | |
gr.DataFrame(section_results_data) | |
# Submit Model Tab | |
with gr.TabItem("Submit Model"): | |
gr.Markdown("### Submit Your Model for Evaluation") | |
model_name = gr.Textbox(label="Model Name") | |
base_model = gr.Textbox(label="Base Model") | |
revision = gr.Textbox(label="Revision", placeholder="main") | |
precision = gr.Dropdown( | |
choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16" | |
) | |
weight_type = gr.Dropdown( | |
choices=["Original", "Delta", "Adapter"], label="Weight Type", value="Original" | |
) | |
model_type = gr.Dropdown( | |
choices=["Transformer", "RNN", "GPT", "Other"], label="Model Type", value="Transformer" | |
) | |
submit_button = gr.Button("Submit") | |
submission_output = gr.Markdown() | |
submit_button.click( | |
add_new_model, | |
inputs=[model_name, base_model, revision, precision, weight_type, model_type], | |
outputs=submission_output, | |
) | |
# Scheduler for refreshing datasets | |
scheduler = BackgroundScheduler() | |
scheduler.add_job( | |
lambda: snapshot_download(repo_id="alibayram", repo_type="dataset", local_dir="cache"), | |
"interval", seconds=1800 | |
) | |
scheduler.start() | |
# Launch app | |
app.queue(default_concurrency_limit=40).launch() |