import gradio as gr from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import snapshot_download # Define dataset paths and constants LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet" RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet" SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet" REPO_ID = "alibayram" QUEUE_REPO = "queue-repo" RESULTS_REPO = "results-repo" TOKEN = "your_hf_token" # Load datasets try: leaderboard_data = pd.read_parquet(LEADERBOARD_PATH) model_responses_data = pd.read_parquet(RESPONSES_PATH) section_results_data = pd.read_parquet(SECTION_RESULTS_PATH) except Exception as e: print(f"Error loading datasets: {e}") raise # Initialize leaderboard def init_leaderboard(dataframe): if dataframe is None or dataframe.empty: raise ValueError("Leaderboard DataFrame is empty or None.") return Leaderboard( value=dataframe, datatype=[col.dtype.name for col in dataframe.dtypes], select_columns=SelectColumns( default_selection=["model", "basari", "toplam_sure"], label="Select Columns to Display", ), search_columns=["model"], filter_columns=[ ColumnFilter("family", type="checkboxgroup", label="Model Family"), ColumnFilter("quantization_level", type="checkboxgroup", label="Quantization Level"), ], ) # Refresh datasets def restart_space(): snapshot_download(repo_id=QUEUE_REPO, local_dir="queue_cache", repo_type="dataset", token=TOKEN) snapshot_download(repo_id=RESULTS_REPO, local_dir="results_cache", repo_type="dataset", token=TOKEN) # Gradio app setup demo = gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") with demo: gr.HTML("

🏆 Turkish MMLU Leaderboard

") gr.Markdown("Explore, evaluate, and compare AI model performance.") # Tabs for leaderboard, model responses, and submission with gr.Tabs() as tabs: with gr.TabItem("Leaderboard"): gr.Markdown("### Explore Leaderboard") leaderboard = init_leaderboard(leaderboard_data) with gr.TabItem("Model Responses"): gr.Markdown("### Model Responses") model_dropdown = gr.Dropdown( choices=leaderboard_data["model"].unique().tolist(), label="Select Model" ) query_input = gr.Textbox(label="Search Questions") responses_table = gr.DataFrame() gr.Button("Search").click( lambda model, query: model_responses_data[model_responses_data["bolum"].str.contains(query)], inputs=[model_dropdown, query_input], outputs=responses_table, ) with gr.TabItem("Submit Model"): gr.Markdown("### Submit Your Model for Evaluation") model_name = gr.Textbox(label="Model Name") revision = gr.Textbox(label="Revision", placeholder="main") precision = gr.Dropdown( choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16" ) submit_button = gr.Button("Submit") submission_result = gr.Markdown() submit_button.click( lambda name, rev, prec: f"Submitted {name} with revision {rev} and precision {prec}.", inputs=[model_name, revision, precision], outputs=submission_result, ) # Scheduler for automatic updates scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.start() # Launch app demo.queue(max_size=40).launch()