alibayram commited on
Commit
a7fa922
·
1 Parent(s): 4ecae57

Enhance Gradio app with new filtering and plotting functionalities, improve model submission process, and restructure tabs for better user experience

Browse files
Files changed (1) hide show
  1. app.py +69 -48
app.py CHANGED
@@ -1,17 +1,13 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
- import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
 
 
6
 
7
- # Define dataset paths and constants
8
  LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
9
  RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
10
  SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
11
- REPO_ID = "alibayram"
12
- QUEUE_REPO = "queue-repo"
13
- RESULTS_REPO = "results-repo"
14
- TOKEN = "your_hf_token"
15
 
16
  # Load datasets
17
  try:
@@ -22,73 +18,98 @@ except Exception as e:
22
  print(f"Error loading datasets: {e}")
23
  raise
24
 
25
- # Initialize leaderboard
26
- def init_leaderboard(dataframe):
27
- if dataframe is None or dataframe.empty:
28
- raise ValueError("Leaderboard DataFrame is empty or None.")
29
- return Leaderboard(
30
- value=dataframe,
31
- datatype=[col.dtype.name for col in dataframe.dtypes],
32
- select_columns=SelectColumns(
33
- default_selection=["model", "basari", "toplam_sure"],
34
- label="Select Columns to Display",
35
- ),
36
- search_columns=["model"],
37
- filter_columns=[
38
- ColumnFilter("family", type="checkboxgroup", label="Model Family"),
39
- ColumnFilter("quantization_level", type="checkboxgroup", label="Quantization Level"),
40
- ],
41
- )
42
 
43
- # Refresh datasets
44
- def restart_space():
45
- snapshot_download(repo_id=QUEUE_REPO, local_dir="queue_cache", repo_type="dataset", token=TOKEN)
46
- snapshot_download(repo_id=RESULTS_REPO, local_dir="results_cache", repo_type="dataset", token=TOKEN)
 
 
 
 
47
 
48
- # Gradio app setup
49
- demo = gr.Blocks(css=".container { max-width: 1200px; margin: auto; }")
50
- with demo:
 
 
 
51
  gr.HTML("<h1>🏆 Turkish MMLU Leaderboard</h1>")
52
  gr.Markdown("Explore, evaluate, and compare AI model performance.")
53
 
54
- # Tabs for leaderboard, model responses, and submission
55
  with gr.Tabs() as tabs:
 
56
  with gr.TabItem("Leaderboard"):
57
- gr.Markdown("### Explore Leaderboard")
58
- leaderboard = init_leaderboard(leaderboard_data)
59
-
 
 
 
 
 
 
 
 
 
60
  with gr.TabItem("Model Responses"):
61
- gr.Markdown("### Model Responses")
62
  model_dropdown = gr.Dropdown(
63
  choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
64
  )
65
- query_input = gr.Textbox(label="Search Questions")
66
  responses_table = gr.DataFrame()
67
  gr.Button("Search").click(
68
- lambda model, query: model_responses_data[model_responses_data["bolum"].str.contains(query)],
69
- inputs=[model_dropdown, query_input],
70
- outputs=responses_table,
71
  )
72
-
 
 
 
 
 
 
73
  with gr.TabItem("Submit Model"):
74
  gr.Markdown("### Submit Your Model for Evaluation")
75
  model_name = gr.Textbox(label="Model Name")
 
76
  revision = gr.Textbox(label="Revision", placeholder="main")
77
  precision = gr.Dropdown(
78
  choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
79
  )
 
 
 
 
 
 
80
  submit_button = gr.Button("Submit")
81
- submission_result = gr.Markdown()
82
  submit_button.click(
83
- lambda name, rev, prec: f"Submitted {name} with revision {rev} and precision {prec}.",
84
- inputs=[model_name, revision, precision],
85
- outputs=submission_result,
86
  )
87
 
88
- # Scheduler for automatic updates
89
  scheduler = BackgroundScheduler()
90
- scheduler.add_job(restart_space, "interval", seconds=1800)
 
 
 
91
  scheduler.start()
92
 
93
  # Launch app
94
- demo.queue(max_size=40).launch()
 
1
  import gradio as gr
 
 
2
  from apscheduler.schedulers.background import BackgroundScheduler
3
  from huggingface_hub import snapshot_download
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
 
7
+ # Dataset paths
8
  LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
9
  RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
10
  SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
 
 
 
 
11
 
12
  # Load datasets
13
  try:
 
18
  print(f"Error loading datasets: {e}")
19
  raise
20
 
21
+ # Helper functions
22
+ def filter_leaderboard(family=None, quantization_level=None):
23
+ df = leaderboard_data.copy()
24
+ if family:
25
+ df = df[df["family"] == family]
26
+ if quantization_level:
27
+ df = df[df["quantization_level"] == quantization_level]
28
+ return df
29
+
30
+ def search_responses(query, model):
31
+ filtered = model_responses_data[model_responses_data["bolum"].str.contains(query, case=False)]
32
+ selected_columns = ["bolum", "soru", "cevap", model + "_cevap"]
33
+ return filtered[selected_columns]
 
 
 
 
34
 
35
+ def plot_section_results():
36
+ fig, ax = plt.subplots(figsize=(10, 6))
37
+ avg_scores = section_results_data.mean(numeric_only=True)
38
+ avg_scores.plot(kind="bar", ax=ax)
39
+ ax.set_title("Average Section-Wise Performance")
40
+ ax.set_ylabel("Accuracy (%)")
41
+ ax.set_xlabel("Sections")
42
+ return fig # Return the figure object
43
 
44
+ def add_new_model(model_name, base_model, revision, precision, weight_type, model_type):
45
+ # Simulated model submission logic
46
+ return f"Model '{model_name}' submitted successfully!"
47
+
48
+ # Gradio app structure
49
+ with gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") as app:
50
  gr.HTML("<h1>🏆 Turkish MMLU Leaderboard</h1>")
51
  gr.Markdown("Explore, evaluate, and compare AI model performance.")
52
 
 
53
  with gr.Tabs() as tabs:
54
+ # Leaderboard Tab
55
  with gr.TabItem("Leaderboard"):
56
+ family_filter = gr.Dropdown(
57
+ choices=leaderboard_data["family"].unique().tolist(), label="Filter by Family", multiselect=False
58
+ )
59
+ quantization_filter = gr.Dropdown(
60
+ choices=leaderboard_data["quantization_level"].unique().tolist(), label="Filter by Quantization Level"
61
+ )
62
+ leaderboard_table = gr.DataFrame(leaderboard_data)
63
+ gr.Button("Apply Filters").click(
64
+ filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table
65
+ )
66
+
67
+ # Model Responses Tab
68
  with gr.TabItem("Model Responses"):
 
69
  model_dropdown = gr.Dropdown(
70
  choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
71
  )
72
+ query_input = gr.Textbox(label="Search Query")
73
  responses_table = gr.DataFrame()
74
  gr.Button("Search").click(
75
+ search_responses, inputs=[query_input, model_dropdown], outputs=responses_table
 
 
76
  )
77
+
78
+ # Section Results Tab
79
+ with gr.TabItem("Section Results"):
80
+ gr.Plot(plot_section_results)
81
+ gr.DataFrame(section_results_data)
82
+
83
+ # Submit Model Tab
84
  with gr.TabItem("Submit Model"):
85
  gr.Markdown("### Submit Your Model for Evaluation")
86
  model_name = gr.Textbox(label="Model Name")
87
+ base_model = gr.Textbox(label="Base Model")
88
  revision = gr.Textbox(label="Revision", placeholder="main")
89
  precision = gr.Dropdown(
90
  choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
91
  )
92
+ weight_type = gr.Dropdown(
93
+ choices=["Original", "Delta", "Adapter"], label="Weight Type", value="Original"
94
+ )
95
+ model_type = gr.Dropdown(
96
+ choices=["Transformer", "RNN", "GPT", "Other"], label="Model Type", value="Transformer"
97
+ )
98
  submit_button = gr.Button("Submit")
99
+ submission_output = gr.Markdown()
100
  submit_button.click(
101
+ add_new_model,
102
+ inputs=[model_name, base_model, revision, precision, weight_type, model_type],
103
+ outputs=submission_output,
104
  )
105
 
106
+ # Scheduler for refreshing datasets
107
  scheduler = BackgroundScheduler()
108
+ scheduler.add_job(
109
+ lambda: snapshot_download(repo_id="alibayram", repo_type="dataset", local_dir="cache"),
110
+ "interval", seconds=1800
111
+ )
112
  scheduler.start()
113
 
114
  # Launch app
115
+ app.queue(default_concurrency_limit=40).launch()