Spaces:

allenai
/

super_leaderboard

Running

File size: 3,589 Bytes

507ce38

"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
import argparse
import json
from datetime import datetime

import gradio as gr
import pandas as pd
import pytz

from constants import *
from constants import column_names

# get the last updated time from the elo_ranks.all.jsonl file
LAST_UPDATED = None 
# with open("_intro.md", "r") as f:
#     INTRO_MD = f.read()
INTRO_MD = ""

with open("_header.md", "r") as f:
    HEADER_MD = f.read()

raw_data = None 
original_df = None  


def df_filters(mode_selection_radio, show_open_source_model_only):
    global original_df
    original_df.insert(0, "", range(1, 1 + len(original_df)))
    return original_df.copy()

def _gstr(text):
    return gr.Text(text, visible=False)

def _tab_leaderboard():
    global original_df, available_models
    if True:
        default_mode = "greedy"
        default_main_df = df_filters(default_mode, False)

        leaderboard_table = gr.components.Dataframe(
            value=default_main_df,
            datatype= ["number", "markdown", "markdown", "number"],
            # max_rows=None,
            height=1000,
            elem_id="leaderboard-table",
            interactive=False,
            visible=True,
            column_widths=[50, 150, 150, 100, 120, 120, 100,100,110,100],
            wrap=True
            # min_width=60,
        )

def _tab_submit():
    markdown_text = """
    Please create an issue on our [Github](https://github.com/allenai/super-benchmark) repository with output of trajectories of your model and results. We will update the leaderboard accordingly.
    """

    gr.Markdown("## 🚀 Submit Your Results\n\n" + markdown_text, elem_classes="markdown-text")



def build_demo():
    global original_df

    with gr.Blocks(theme=gr.themes.Soft(), css=css, js=js_light) as demo:
        # convert LAST_UPDATED to the PDT time 
        LAST_UPDATED = datetime.now(pytz.timezone('US/Pacific')).strftime("%Y-%m-%d %H:%M:%S")
        header_md_text = HEADER_MD.replace("{LAST_UPDATED}", str(LAST_UPDATED))
        gr.Markdown(header_md_text, elem_classes="markdown-text") 

        with gr.Tabs(elem_classes="tab-buttons") as tabs: 
            with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
                _tab_leaderboard()
            with gr.TabItem("🚀 Submit Your Results", elem_id="od-benchmark-tab-table", id=3):
                _tab_submit() 

    return demo 



def data_load(result_file):
    global raw_data, original_df
    print(f"Loading {result_file}")
    column_names_main = column_names.copy()
    # column_names_main.update({})
    main_ordered_columns = ORDERED_COLUMN_NAMES 
    # filter the data with Total Puzzles == 1000 
    
    click_url = True 
    # read json file from the result_file 
    with open(result_file, "r") as f:
        raw_data = json.load(f)
    # floatify the data, if possible
    for d in raw_data:
        for k, v in d.items():
            try:
                d[k] = float(v)
            except:
                pass
    original_df = pd.DataFrame(raw_data)

    original_df.sort_values(by="Expert (Accuracy)", ascending=False, inplace=True)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--share", action="store_true")
    parser.add_argument("--result_file", help="Path to results table", default="ZeroEval-main/result_dirs/leaderboard.json")
    
    args = parser.parse_args()
    data_load(args.result_file)

    demo = build_demo()
    demo.launch(share=args.share, height=3000, width="100%")