Freddy Boulton
commited on
Commit
·
0e24f43
1
Parent(s):
1f0969f
my version
Browse files- app.py +34 -25
- constants.py +1 -1
- utils.py +4 -3
app.py
CHANGED
|
@@ -3,6 +3,9 @@ import pandas as pd
|
|
| 3 |
import gradio as gr
|
| 4 |
from gradio.themes.utils import sizes
|
| 5 |
from gradio_leaderboard import Leaderboard
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
|
| 8 |
from constants import (
|
|
@@ -49,6 +52,7 @@ def get_leaderboard_object(assay: str | None = None):
|
|
| 49 |
# TODO how to sort filter columns alphabetically?
|
| 50 |
# Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
|
| 51 |
# Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
|
|
|
|
| 52 |
lb = Leaderboard(
|
| 53 |
value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
|
| 54 |
datatype=["str", "str", "str", "number", "str"],
|
|
@@ -64,7 +68,33 @@ def get_leaderboard_object(assay: str | None = None):
|
|
| 64 |
|
| 65 |
|
| 66 |
# Initialize global dataframe
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
# Lood: Two problems currently:
|
| 70 |
# 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly)
|
|
@@ -73,22 +103,6 @@ current_dataframe = fetch_hf_results()
|
|
| 73 |
# Make font size bigger using gradio theme
|
| 74 |
with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
|
| 75 |
timer = gr.Timer(3) # Run every 3 seconds when page is focused
|
| 76 |
-
data_version = gr.State(value=0) # Track data changes
|
| 77 |
-
|
| 78 |
-
def update_current_dataframe():
|
| 79 |
-
global current_dataframe
|
| 80 |
-
new_dataframe = fetch_hf_results()
|
| 81 |
-
new_hash = hashlib.sha256(pd.util.hash_pandas_object(new_dataframe).values).hexdigest()
|
| 82 |
-
|
| 83 |
-
# Check if data has actually changed
|
| 84 |
-
if new_hash != data_version.value:
|
| 85 |
-
print(f"TMP Dataframe has changed at {get_time()}. Old hash: {str(data_version.value)[:8]}, new hash: {str(new_hash)[:8]}")
|
| 86 |
-
current_dataframe = new_dataframe
|
| 87 |
-
data_version.value = new_hash # Increment version to trigger updates
|
| 88 |
-
return new_hash
|
| 89 |
-
return data_version.value
|
| 90 |
-
|
| 91 |
-
timer.tick(fn=update_current_dataframe, outputs=data_version)
|
| 92 |
|
| 93 |
## Header
|
| 94 |
|
|
@@ -155,13 +169,8 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
|
|
| 155 |
"""
|
| 156 |
)
|
| 157 |
lb = get_leaderboard_object()
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
print(f"TMP Refreshing overall leaderboard at {get_time()}. Data version: {data_version.value}")
|
| 161 |
-
return format_leaderboard_table(df_results=current_dataframe)
|
| 162 |
-
|
| 163 |
-
# Refresh when data version changes
|
| 164 |
-
data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
|
| 165 |
|
| 166 |
# At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
|
| 167 |
# gr.Markdown(
|
|
@@ -306,4 +315,4 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
|
|
| 306 |
)
|
| 307 |
|
| 308 |
if __name__ == "__main__":
|
| 309 |
-
demo.launch(ssr_mode=False, share=
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
from gradio.themes.utils import sizes
|
| 5 |
from gradio_leaderboard import Leaderboard
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
import contextlib
|
| 8 |
+
load_dotenv() # Load environment variables from .env file
|
| 9 |
|
| 10 |
from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
|
| 11 |
from constants import (
|
|
|
|
| 52 |
# TODO how to sort filter columns alphabetically?
|
| 53 |
# Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
|
| 54 |
# Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
|
| 55 |
+
current_dataframe = pd.read_csv("debug-current-results.csv")
|
| 56 |
lb = Leaderboard(
|
| 57 |
value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
|
| 58 |
datatype=["str", "str", "str", "number", "str"],
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
# Initialize global dataframe
|
| 71 |
+
fetch_hf_results()
|
| 72 |
+
current_dataframe = pd.read_csv("debug-current-results.csv")
|
| 73 |
+
|
| 74 |
+
def refresh_overall_leaderboard():
|
| 75 |
+
current_dataframe = pd.read_csv("debug-current-results.csv")
|
| 76 |
+
return format_leaderboard_table(df_results=current_dataframe)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def fetch_latest_data():
|
| 80 |
+
import time
|
| 81 |
+
while True:
|
| 82 |
+
try:
|
| 83 |
+
fetch_hf_results()
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error fetching latest data: {e}")
|
| 86 |
+
time.sleep(3) # Fetch every 60 seconds
|
| 87 |
+
print("Exiting data fetch thread")
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
@contextlib.asynccontextmanager
|
| 91 |
+
async def periodic_data_fetch(app):
|
| 92 |
+
import threading
|
| 93 |
+
t = threading.Thread(target=fetch_latest_data, daemon=True)
|
| 94 |
+
t.start()
|
| 95 |
+
yield
|
| 96 |
+
t.join(3)
|
| 97 |
+
|
| 98 |
|
| 99 |
# Lood: Two problems currently:
|
| 100 |
# 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly)
|
|
|
|
| 103 |
# Make font size bigger using gradio theme
|
| 104 |
with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
|
| 105 |
timer = gr.Timer(3) # Run every 3 seconds when page is focused
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
## Header
|
| 108 |
|
|
|
|
| 169 |
"""
|
| 170 |
)
|
| 171 |
lb = get_leaderboard_object()
|
| 172 |
+
timer.tick(fn=refresh_overall_leaderboard, outputs=lb)
|
| 173 |
+
demo.load(fn=refresh_overall_leaderboard, outputs=lb)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
# At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
|
| 176 |
# gr.Markdown(
|
|
|
|
| 315 |
)
|
| 316 |
|
| 317 |
if __name__ == "__main__":
|
| 318 |
+
demo.launch(ssr_mode=False, share=False, app_kwargs={"lifespan": periodic_data_fetch})
|
constants.py
CHANGED
|
@@ -68,7 +68,7 @@ API = HfApi(token=TOKEN)
|
|
| 68 |
# Huggingface repos
|
| 69 |
ORGANIZATION = "ginkgo-datapoints"
|
| 70 |
SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
|
| 71 |
-
RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
|
| 72 |
|
| 73 |
# Leaderboard dataframes
|
| 74 |
LEADERBOARD_RESULTS_COLUMNS = [
|
|
|
|
| 68 |
# Huggingface repos
|
| 69 |
ORGANIZATION = "ginkgo-datapoints"
|
| 70 |
SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
|
| 71 |
+
RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results-test"
|
| 72 |
|
| 73 |
# Leaderboard dataframes
|
| 74 |
LEADERBOARD_RESULTS_COLUMNS = [
|
utils.py
CHANGED
|
@@ -30,8 +30,9 @@ def fetch_hf_results():
|
|
| 30 |
# load_dataset should cache by default if not using force_redownload
|
| 31 |
df = load_dataset(
|
| 32 |
RESULTS_REPO,
|
| 33 |
-
data_files="
|
| 34 |
)["train"].to_pandas()
|
|
|
|
| 35 |
assert all(
|
| 36 |
col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
|
| 37 |
), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
|
|
@@ -55,8 +56,8 @@ def fetch_hf_results():
|
|
| 55 |
# Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
|
| 56 |
# Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
|
| 57 |
df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
|
| 58 |
-
|
| 59 |
-
|
| 60 |
|
| 61 |
|
| 62 |
# Readable hashing function similar to coolname or codenamize
|
|
|
|
| 30 |
# load_dataset should cache by default if not using force_redownload
|
| 31 |
df = load_dataset(
|
| 32 |
RESULTS_REPO,
|
| 33 |
+
data_files="data/train-00000-of-00001.parquet",
|
| 34 |
)["train"].to_pandas()
|
| 35 |
+
print("fetched results from HF", df.shape)
|
| 36 |
assert all(
|
| 37 |
col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
|
| 38 |
), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
|
|
|
|
| 56 |
# Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
|
| 57 |
# Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
|
| 58 |
df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
|
| 59 |
+
print("after filtering to latest submissions only", df.shape)
|
| 60 |
+
df.to_csv("debug-current-results.csv", index=False)
|
| 61 |
|
| 62 |
|
| 63 |
# Readable hashing function similar to coolname or codenamize
|