import pandas as pd | |
from datasets import load_dataset | |
import gradio as gr | |
from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS | |
pd.set_option('display.max_columns', None) | |
def show_output_box(message): | |
return gr.update(value=message, visible=True) | |
def fetch_hf_results(): | |
# For debugging | |
# # Print current time in EST | |
# EST = timezone(timedelta(hours=-4)) | |
# print(f"tmp: Fetching results from HF at {datetime.now(EST)}") | |
# Should cache by default if not using force_redownload | |
df = load_dataset( | |
RESULTS_REPO, data_files="auto_submissions/metrics_all.csv", | |
)["train"].to_pandas() | |
assert all(col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_COLUMNS) - set(df.columns)}" | |
# Show latest submission only | |
df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay", "user"], keep="first") | |
df["property"] = df["assay"].map(ASSAY_RENAME) | |
return df | |