alibayram commited on
Commit
4ecae57
·
1 Parent(s): 8e219a6

Refactor Gradio app to enhance leaderboard functionality, improve model response search, and streamline model submission process

Browse files
Files changed (1) hide show
  1. app.py +48 -69
app.py CHANGED
@@ -1,13 +1,17 @@
1
  import gradio as gr
 
 
2
  from apscheduler.schedulers.background import BackgroundScheduler
3
  from huggingface_hub import snapshot_download
4
- import pandas as pd
5
- import matplotlib.pyplot as plt
6
 
7
- # Dataset paths
8
  LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
9
  RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
10
  SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
 
 
 
 
11
 
12
  # Load datasets
13
  try:
@@ -18,98 +22,73 @@ except Exception as e:
18
  print(f"Error loading datasets: {e}")
19
  raise
20
 
21
- # Helper functions
22
- def filter_leaderboard(family=None, quantization_level=None):
23
- df = leaderboard_data.copy()
24
- if family:
25
- df = df[df["family"] == family]
26
- if quantization_level:
27
- df = df[df["quantization_level"] == quantization_level]
28
- return df
29
-
30
- def search_responses(query, model):
31
- filtered = model_responses_data[model_responses_data["bolum"].str.contains(query, case=False)]
32
- selected_columns = ["bolum", "soru", "cevap", model + "_cevap"]
33
- return filtered[selected_columns]
 
 
 
 
34
 
35
- def plot_section_results():
36
- fig, ax = plt.subplots(figsize=(10, 6))
37
- avg_scores = section_results_data.mean(numeric_only=True)
38
- avg_scores.plot(kind="bar", ax=ax)
39
- ax.set_title("Average Section-Wise Performance")
40
- ax.set_ylabel("Accuracy (%)")
41
- ax.set_xlabel("Sections")
42
- return fig # Return the figure object
43
 
44
- def add_new_model(model_name, base_model, revision, precision, weight_type, model_type):
45
- # Simulated model submission logic
46
- return f"Model '{model_name}' submitted successfully!"
47
-
48
- # Gradio app structure
49
- with gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") as app:
50
  gr.HTML("<h1>🏆 Turkish MMLU Leaderboard</h1>")
51
  gr.Markdown("Explore, evaluate, and compare AI model performance.")
52
 
 
53
  with gr.Tabs() as tabs:
54
- # Leaderboard Tab
55
  with gr.TabItem("Leaderboard"):
56
- family_filter = gr.Dropdown(
57
- choices=leaderboard_data["family"].unique().tolist(), label="Filter by Family", multiselect=False
58
- )
59
- quantization_filter = gr.Dropdown(
60
- choices=leaderboard_data["quantization_level"].unique().tolist(), label="Filter by Quantization Level"
61
- )
62
- leaderboard_table = gr.DataFrame(leaderboard_data)
63
- gr.Button("Apply Filters").click(
64
- filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table
65
- )
66
-
67
- # Model Responses Tab
68
  with gr.TabItem("Model Responses"):
 
69
  model_dropdown = gr.Dropdown(
70
  choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
71
  )
72
- query_input = gr.Textbox(label="Search Query")
73
  responses_table = gr.DataFrame()
74
  gr.Button("Search").click(
75
- search_responses, inputs=[query_input, model_dropdown], outputs=responses_table
 
 
76
  )
77
-
78
- # Section Results Tab
79
- with gr.TabItem("Section Results"):
80
- gr.Plot(plot_section_results)
81
- gr.DataFrame(section_results_data)
82
-
83
- # Submit Model Tab
84
  with gr.TabItem("Submit Model"):
85
  gr.Markdown("### Submit Your Model for Evaluation")
86
  model_name = gr.Textbox(label="Model Name")
87
- base_model = gr.Textbox(label="Base Model")
88
  revision = gr.Textbox(label="Revision", placeholder="main")
89
  precision = gr.Dropdown(
90
  choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
91
  )
92
- weight_type = gr.Dropdown(
93
- choices=["Original", "Delta", "Adapter"], label="Weight Type", value="Original"
94
- )
95
- model_type = gr.Dropdown(
96
- choices=["Transformer", "RNN", "GPT", "Other"], label="Model Type", value="Transformer"
97
- )
98
  submit_button = gr.Button("Submit")
99
- submission_output = gr.Markdown()
100
  submit_button.click(
101
- add_new_model,
102
- inputs=[model_name, base_model, revision, precision, weight_type, model_type],
103
- outputs=submission_output,
104
  )
105
 
106
- # Scheduler for refreshing datasets
107
  scheduler = BackgroundScheduler()
108
- scheduler.add_job(
109
- lambda: snapshot_download(repo_id="alibayram", repo_type="dataset", local_dir="cache"),
110
- "interval", seconds=1800
111
- )
112
  scheduler.start()
113
 
114
  # Launch app
115
- app.queue(concurrency_count=40).launch()
 
1
  import gradio as gr
2
+ from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
+ import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
 
 
6
 
7
+ # Define dataset paths and constants
8
  LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
9
  RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
10
  SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
11
+ REPO_ID = "alibayram"
12
+ QUEUE_REPO = "queue-repo"
13
+ RESULTS_REPO = "results-repo"
14
+ TOKEN = "your_hf_token"
15
 
16
  # Load datasets
17
  try:
 
22
  print(f"Error loading datasets: {e}")
23
  raise
24
 
25
+ # Initialize leaderboard
26
+ def init_leaderboard(dataframe):
27
+ if dataframe is None or dataframe.empty:
28
+ raise ValueError("Leaderboard DataFrame is empty or None.")
29
+ return Leaderboard(
30
+ value=dataframe,
31
+ datatype=[col.dtype.name for col in dataframe.dtypes],
32
+ select_columns=SelectColumns(
33
+ default_selection=["model", "basari", "toplam_sure"],
34
+ label="Select Columns to Display",
35
+ ),
36
+ search_columns=["model"],
37
+ filter_columns=[
38
+ ColumnFilter("family", type="checkboxgroup", label="Model Family"),
39
+ ColumnFilter("quantization_level", type="checkboxgroup", label="Quantization Level"),
40
+ ],
41
+ )
42
 
43
+ # Refresh datasets
44
+ def restart_space():
45
+ snapshot_download(repo_id=QUEUE_REPO, local_dir="queue_cache", repo_type="dataset", token=TOKEN)
46
+ snapshot_download(repo_id=RESULTS_REPO, local_dir="results_cache", repo_type="dataset", token=TOKEN)
 
 
 
 
47
 
48
+ # Gradio app setup
49
+ demo = gr.Blocks(css=".container { max-width: 1200px; margin: auto; }")
50
+ with demo:
 
 
 
51
  gr.HTML("<h1>🏆 Turkish MMLU Leaderboard</h1>")
52
  gr.Markdown("Explore, evaluate, and compare AI model performance.")
53
 
54
+ # Tabs for leaderboard, model responses, and submission
55
  with gr.Tabs() as tabs:
 
56
  with gr.TabItem("Leaderboard"):
57
+ gr.Markdown("### Explore Leaderboard")
58
+ leaderboard = init_leaderboard(leaderboard_data)
59
+
 
 
 
 
 
 
 
 
 
60
  with gr.TabItem("Model Responses"):
61
+ gr.Markdown("### Model Responses")
62
  model_dropdown = gr.Dropdown(
63
  choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
64
  )
65
+ query_input = gr.Textbox(label="Search Questions")
66
  responses_table = gr.DataFrame()
67
  gr.Button("Search").click(
68
+ lambda model, query: model_responses_data[model_responses_data["bolum"].str.contains(query)],
69
+ inputs=[model_dropdown, query_input],
70
+ outputs=responses_table,
71
  )
72
+
 
 
 
 
 
 
73
  with gr.TabItem("Submit Model"):
74
  gr.Markdown("### Submit Your Model for Evaluation")
75
  model_name = gr.Textbox(label="Model Name")
 
76
  revision = gr.Textbox(label="Revision", placeholder="main")
77
  precision = gr.Dropdown(
78
  choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
79
  )
 
 
 
 
 
 
80
  submit_button = gr.Button("Submit")
81
+ submission_result = gr.Markdown()
82
  submit_button.click(
83
+ lambda name, rev, prec: f"Submitted {name} with revision {rev} and precision {prec}.",
84
+ inputs=[model_name, revision, precision],
85
+ outputs=submission_result,
86
  )
87
 
88
+ # Scheduler for automatic updates
89
  scheduler = BackgroundScheduler()
90
+ scheduler.add_job(restart_space, "interval", seconds=1800)
 
 
 
91
  scheduler.start()
92
 
93
  # Launch app
94
+ demo.queue(max_size=40).launch()