xeon27
commited on
Commit
·
ed6229f
1
Parent(s):
2b8ba97
Fix bug
Browse files
app.py
CHANGED
|
@@ -26,7 +26,7 @@ from src.display.utils import (
|
|
| 26 |
Precision
|
| 27 |
)
|
| 28 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
| 29 |
-
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
| 30 |
from src.submission.submit import add_new_eval
|
| 31 |
|
| 32 |
|
|
@@ -59,20 +59,21 @@ AGENTIC_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PAT
|
|
| 59 |
pending_eval_queue_df,
|
| 60 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 61 |
|
| 62 |
-
def init_leaderboard(dataframe):
|
| 63 |
if dataframe is None or dataframe.empty:
|
| 64 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
|
|
|
| 65 |
return Leaderboard(
|
| 66 |
value=dataframe,
|
| 67 |
-
datatype=[c.type for c in
|
| 68 |
select_columns=SelectColumns(
|
| 69 |
-
default_selection=[c.name for c in
|
| 70 |
-
cant_deselect=[c.name for c in
|
| 71 |
label="Select Columns to Display:",
|
| 72 |
),
|
| 73 |
# search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
|
| 74 |
search_columns=[AutoEvalColumn.model.name,],
|
| 75 |
-
hide_columns=[c.name for c in
|
| 76 |
# filter_columns=[
|
| 77 |
# ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 78 |
# ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
|
@@ -100,10 +101,10 @@ with demo:
|
|
| 100 |
|
| 101 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 102 |
with gr.TabItem("Single-turn Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 103 |
-
leaderboard = init_leaderboard(ST_LEADERBOARD_DF)
|
| 104 |
|
| 105 |
with gr.TabItem("Agentic Benchmark", elem_id="llm-benchmark-tab-table", id=1):
|
| 106 |
-
leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF)
|
| 107 |
|
| 108 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 109 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
|
| 26 |
Precision
|
| 27 |
)
|
| 28 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
| 29 |
+
from src.populate import get_evaluation_queue_df, get_leaderboard_df, TASK_NAME_INVERSE_MAP
|
| 30 |
from src.submission.submit import add_new_eval
|
| 31 |
|
| 32 |
|
|
|
|
| 59 |
pending_eval_queue_df,
|
| 60 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 61 |
|
| 62 |
+
def init_leaderboard(dataframe, benchmark_type):
|
| 63 |
if dataframe is None or dataframe.empty:
|
| 64 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 65 |
+
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name=="Model") or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
| 66 |
return Leaderboard(
|
| 67 |
value=dataframe,
|
| 68 |
+
datatype=[c.type for c in AutoEvalColumnSubset],
|
| 69 |
select_columns=SelectColumns(
|
| 70 |
+
default_selection=[c.name for c in AutoEvalColumnSubset if c.displayed_by_default],
|
| 71 |
+
cant_deselect=[c.name for c in AutoEvalColumnSubset if c.never_hidden],
|
| 72 |
label="Select Columns to Display:",
|
| 73 |
),
|
| 74 |
# search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
|
| 75 |
search_columns=[AutoEvalColumn.model.name,],
|
| 76 |
+
hide_columns=[c.name for c in AutoEvalColumnSubset if c.hidden],
|
| 77 |
# filter_columns=[
|
| 78 |
# ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 79 |
# ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
|
|
|
| 101 |
|
| 102 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 103 |
with gr.TabItem("Single-turn Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 104 |
+
leaderboard = init_leaderboard(ST_LEADERBOARD_DF, "single-turn")
|
| 105 |
|
| 106 |
with gr.TabItem("Agentic Benchmark", elem_id="llm-benchmark-tab-table", id=1):
|
| 107 |
+
leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF, "agentic")
|
| 108 |
|
| 109 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 110 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|