abdev-leaderboard

Running

App Files Files Community

Freddy Boulton commited on Sep 19

Commit

0e24f43

1 Parent(s): 1f0969f

my version

Browse files

Files changed (3) hide show

app.py +34 -25
constants.py +1 -1
utils.py +4 -3

app.py CHANGED Viewed

@@ -3,6 +3,9 @@ import pandas as pd
 import gradio as gr
 from gradio.themes.utils import sizes
 from gradio_leaderboard import Leaderboard
 from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
 from constants import (
@@ -49,6 +52,7 @@ def get_leaderboard_object(assay: str | None = None):
     # TODO how to sort filter columns alphabetically?
     # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
     # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
     lb = Leaderboard(
         value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
         datatype=["str", "str", "str", "number", "str"],
@@ -64,7 +68,33 @@ def get_leaderboard_object(assay: str | None = None):
 # Initialize global dataframe
-current_dataframe = fetch_hf_results()
 # Lood: Two problems currently:
 # 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly)
@@ -73,22 +103,6 @@ current_dataframe = fetch_hf_results()
 # Make font size bigger using gradio theme
 with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
     timer = gr.Timer(3)  # Run every 3 seconds when page is focused
-    data_version = gr.State(value=0)  # Track data changes
-    def update_current_dataframe():
-        global current_dataframe
-        new_dataframe = fetch_hf_results()
-        new_hash = hashlib.sha256(pd.util.hash_pandas_object(new_dataframe).values).hexdigest()
-        # Check if data has actually changed
-        if new_hash != data_version.value:
-            print(f"TMP Dataframe has changed at {get_time()}. Old hash: {str(data_version.value)[:8]}, new hash: {str(new_hash)[:8]}")
-            current_dataframe = new_dataframe
-            data_version.value = new_hash # Increment version to trigger updates
-            return new_hash
-        return data_version.value
-    timer.tick(fn=update_current_dataframe, outputs=data_version)
     ## Header
@@ -155,13 +169,8 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
                 """
             )
             lb = get_leaderboard_object()
-            def refresh_overall_leaderboard():
-                print(f"TMP Refreshing overall leaderboard at {get_time()}. Data version: {data_version.value}")
-                return format_leaderboard_table(df_results=current_dataframe)
-            # Refresh when data version changes
-            data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
             # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
             # gr.Markdown(
@@ -306,4 +315,4 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
     )
 if __name__ == "__main__":
-    demo.launch(ssr_mode=False, share=True)

 import gradio as gr
 from gradio.themes.utils import sizes
 from gradio_leaderboard import Leaderboard
+from dotenv import load_dotenv
+import contextlib
+load_dotenv()  # Load environment variables from .env file
 from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
 from constants import (
     # TODO how to sort filter columns alphabetically?
     # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
     # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
+    current_dataframe = pd.read_csv("debug-current-results.csv")
     lb = Leaderboard(
         value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
         datatype=["str", "str", "str", "number", "str"],
 # Initialize global dataframe
+fetch_hf_results()
+current_dataframe = pd.read_csv("debug-current-results.csv")
+def refresh_overall_leaderboard():
+    current_dataframe = pd.read_csv("debug-current-results.csv")
+    return format_leaderboard_table(df_results=current_dataframe)
+def fetch_latest_data():
+    import time
+    while True:
+        try:
+            fetch_hf_results()
+        except Exception as e:
+            print(f"Error fetching latest data: {e}")
+        time.sleep(3)  # Fetch every 60 seconds
+    print("Exiting data fetch thread")
+@contextlib.asynccontextmanager
+async def periodic_data_fetch(app):
+    import threading
+    t = threading.Thread(target=fetch_latest_data, daemon=True)
+    t.start()
+    yield
+    t.join(3)
 # Lood: Two problems currently:
 # 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly)
 # Make font size bigger using gradio theme
 with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
     timer = gr.Timer(3)  # Run every 3 seconds when page is focused
     ## Header
                 """
             )
             lb = get_leaderboard_object()
+            timer.tick(fn=refresh_overall_leaderboard, outputs=lb)
+            demo.load(fn=refresh_overall_leaderboard, outputs=lb)
             # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
             # gr.Markdown(
     )
 if __name__ == "__main__":
+    demo.launch(ssr_mode=False, share=False, app_kwargs={"lifespan": periodic_data_fetch})

constants.py CHANGED Viewed

@@ -68,7 +68,7 @@ API = HfApi(token=TOKEN)
 # Huggingface repos
 ORGANIZATION = "ginkgo-datapoints"
 SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
-RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
 # Leaderboard dataframes
 LEADERBOARD_RESULTS_COLUMNS = [

 # Huggingface repos
 ORGANIZATION = "ginkgo-datapoints"
 SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
+RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results-test"
 # Leaderboard dataframes
 LEADERBOARD_RESULTS_COLUMNS = [

utils.py CHANGED Viewed

@@ -30,8 +30,9 @@ def fetch_hf_results():
     # load_dataset should cache by default if not using force_redownload
     df = load_dataset(
         RESULTS_REPO,
-        data_files="auto_submissions/metrics_all.csv",
     )["train"].to_pandas()
     assert all(
         col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
     ), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
@@ -55,8 +56,8 @@ def fetch_hf_results():
     # Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
     # Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
     df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
-    return df
 # Readable hashing function similar to coolname or codenamize

     # load_dataset should cache by default if not using force_redownload
     df = load_dataset(
         RESULTS_REPO,
+        data_files="data/train-00000-of-00001.parquet",
     )["train"].to_pandas()
+    print("fetched results from HF", df.shape)
     assert all(
         col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
     ), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
     # Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
     # Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
     df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
+    print("after filtering to latest submissions only", df.shape)
+    df.to_csv("debug-current-results.csv", index=False)
 # Readable hashing function similar to coolname or codenamize