# some code blocks are taken from https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/tree/main
import json
import os
from datetime import datetime, timezone
import gradio as gr
import pandas as pd
from src.css_html import custom_css
from src.text_content import ABOUT_TEXT, SUBMISSION_TEXT_3
from src.utils import (
AutoEvalColumn,
fields,
is_model_on_hub,
make_clickable_names,
)
df = pd.read_csv("data/code_eval_board.csv")
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
COLS_LITE = [
c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
TYPES_LITE = [
c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
def select_columns(df, columns):
always_here_cols = [
AutoEvalColumn.model.name,
]
# We use COLS to maintain sorting
filtered_df = df[
always_here_cols + [c for c in COLS if c in df.columns and c in columns]
]
return filtered_df
def filter_items(df, leaderboard_table, query):
if query == "all":
return df[leaderboard_table.columns]
else:
query = query[0]
filtered_df = df[df["T"].str.contains(query, na=False)]
return filtered_df[leaderboard_table.columns]
def search_table(df, leaderboard_table, query):
filtered_df = df[(df["Model"].str.contains(query, case=False))]
return filtered_df[leaderboard_table.columns]
df = make_clickable_names(df)
demo = gr.Blocks(css=custom_css)
with demo:
with gr.Row():
gr.Markdown(
"""
ESPnet-EZ Leaderboard for LibriSpeech-100h ASR1
\
\
Users can use reproduce
function to reproduce the numbers in ESPnet-EZ!
""",
elem_classes="markdown-text",
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("🔍 Evaluation table", id=0):
with gr.Accordion("➡️ See All Columns", open=False):
shown_columns = gr.CheckboxGroup(
choices=[
c
for c in COLS
if c
not in [
# AutoEvalColumn.dummy.name,
AutoEvalColumn.model.name,
]
],
value=[
c
for c in COLS_LITE
if c
not in [
# AutoEvalColumn.dummy.name,
AutoEvalColumn.model.name,
]
],
label="",
elem_id="column-select",
interactive=True,
)
# with gr.Column(min_width=780):
with gr.Row():
search_bar = gr.Textbox(
placeholder="🔍 Search for your model and press ENTER...",
show_label=False,
elem_id="search-bar",
)
leaderboard_df = gr.components.Dataframe(
value=df[
[
AutoEvalColumn.model.name,
]
+ shown_columns.value
],
headers=[
AutoEvalColumn.model.name,
]
+ shown_columns.value,
datatype=TYPES,
elem_id="leaderboard-table",
interactive=False,
)
hidden_leaderboard_df = gr.components.Dataframe(
value=df,
headers=COLS,
datatype=["str" for _ in range(len(COLS))],
visible=False,
)
search_bar.submit(
search_table,
[hidden_leaderboard_df, leaderboard_df, search_bar],
leaderboard_df,
)
shown_columns.change(
select_columns,
[hidden_leaderboard_df, shown_columns],
leaderboard_df,
)
gr.Markdown(
"""
**Notes:**
- Win Rate represents how often a model outperforms other models in each language, averaged across all languages.
- The scores of instruction-tuned models might be significantly higher on humaneval-python than other languages. We use the instruction format of HumanEval. For other languages, we use base MultiPL-E prompts.
- For more details check the 📝 About section.
- Models with a 🔴 symbol represent external evaluation submission, this means that we didn't verify the results, you can find the author's submission under `Submission PR` field from `See All Columns` tab.
""",
elem_classes="markdown-text",
)
with gr.TabItem("📝 About", id=2):
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
with gr.TabItem("Submit results 🚀", id=3):
gr.Markdown(SUBMISSION_TEXT_3)
demo.launch()