loodvanniekerkginkgo commited on
Commit
211c032
·
verified ·
1 Parent(s): 1f0969f
Files changed (2) hide show
  1. app.py +36 -25
  2. utils.py +2 -2
app.py CHANGED
@@ -3,6 +3,9 @@ import pandas as pd
3
  import gradio as gr
4
  from gradio.themes.utils import sizes
5
  from gradio_leaderboard import Leaderboard
 
 
 
6
 
7
  from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
8
  from constants import (
@@ -49,6 +52,7 @@ def get_leaderboard_object(assay: str | None = None):
49
  # TODO how to sort filter columns alphabetically?
50
  # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
51
  # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
 
52
  lb = Leaderboard(
53
  value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
54
  datatype=["str", "str", "str", "number", "str"],
@@ -64,7 +68,35 @@ def get_leaderboard_object(assay: str | None = None):
64
 
65
 
66
  # Initialize global dataframe
67
- current_dataframe = fetch_hf_results()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # Lood: Two problems currently:
70
  # 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly)
@@ -73,22 +105,6 @@ current_dataframe = fetch_hf_results()
73
  # Make font size bigger using gradio theme
74
  with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
75
  timer = gr.Timer(3) # Run every 3 seconds when page is focused
76
- data_version = gr.State(value=0) # Track data changes
77
-
78
- def update_current_dataframe():
79
- global current_dataframe
80
- new_dataframe = fetch_hf_results()
81
- new_hash = hashlib.sha256(pd.util.hash_pandas_object(new_dataframe).values).hexdigest()
82
-
83
- # Check if data has actually changed
84
- if new_hash != data_version.value:
85
- print(f"TMP Dataframe has changed at {get_time()}. Old hash: {str(data_version.value)[:8]}, new hash: {str(new_hash)[:8]}")
86
- current_dataframe = new_dataframe
87
- data_version.value = new_hash # Increment version to trigger updates
88
- return new_hash
89
- return data_version.value
90
-
91
- timer.tick(fn=update_current_dataframe, outputs=data_version)
92
 
93
  ## Header
94
 
@@ -155,13 +171,8 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
155
  """
156
  )
157
  lb = get_leaderboard_object()
158
-
159
- def refresh_overall_leaderboard():
160
- print(f"TMP Refreshing overall leaderboard at {get_time()}. Data version: {data_version.value}")
161
- return format_leaderboard_table(df_results=current_dataframe)
162
-
163
- # Refresh when data version changes
164
- data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
165
 
166
  # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
167
  # gr.Markdown(
@@ -306,4 +317,4 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
306
  )
307
 
308
  if __name__ == "__main__":
309
- demo.launch(ssr_mode=False, share=True)
 
3
  import gradio as gr
4
  from gradio.themes.utils import sizes
5
  from gradio_leaderboard import Leaderboard
6
+ from dotenv import load_dotenv
7
+ import contextlib
8
+ load_dotenv() # Load environment variables from .env file
9
 
10
  from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
11
  from constants import (
 
52
  # TODO how to sort filter columns alphabetically?
53
  # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
54
  # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
55
+ current_dataframe = pd.read_csv("debug-current-results.csv")
56
  lb = Leaderboard(
57
  value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
58
  datatype=["str", "str", "str", "number", "str"],
 
68
 
69
 
70
  # Initialize global dataframe
71
+ fetch_hf_results()
72
+ current_dataframe = pd.read_csv("debug-current-results.csv")
73
+
74
+ def refresh_overall_leaderboard():
75
+ current_dataframe = pd.read_csv("debug-current-results.csv")
76
+ return format_leaderboard_table(df_results=current_dataframe)
77
+
78
+
79
+ def fetch_latest_data(stop_event):
80
+ import time
81
+ while not stop_event.is_set():
82
+ try:
83
+ fetch_hf_results()
84
+ except Exception as e:
85
+ print(f"Error fetching latest data: {e}")
86
+ time.sleep(3) # Fetch every 60 seconds
87
+ print("Exiting data fetch thread")
88
+
89
+
90
+ @contextlib.asynccontextmanager
91
+ async def periodic_data_fetch(app):
92
+ import threading
93
+ event = threading.Event()
94
+ t = threading.Thread(target=fetch_latest_data, args=(event,), daemon=True)
95
+ t.start()
96
+ yield
97
+ event.set()
98
+ t.join(3)
99
+
100
 
101
  # Lood: Two problems currently:
102
  # 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly)
 
105
  # Make font size bigger using gradio theme
106
  with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
107
  timer = gr.Timer(3) # Run every 3 seconds when page is focused
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  ## Header
110
 
 
171
  """
172
  )
173
  lb = get_leaderboard_object()
174
+ timer.tick(fn=refresh_overall_leaderboard, outputs=lb)
175
+ demo.load(fn=refresh_overall_leaderboard, outputs=lb)
 
 
 
 
 
176
 
177
  # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
178
  # gr.Markdown(
 
317
  )
318
 
319
  if __name__ == "__main__":
320
+ demo.launch(ssr_mode=False, share=False, app_kwargs={"lifespan": periodic_data_fetch})
utils.py CHANGED
@@ -32,6 +32,7 @@ def fetch_hf_results():
32
  RESULTS_REPO,
33
  data_files="auto_submissions/metrics_all.csv",
34
  )["train"].to_pandas()
 
35
  assert all(
36
  col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
37
  ), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
@@ -55,8 +56,7 @@ def fetch_hf_results():
55
  # Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
56
  # Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
57
  df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
58
-
59
- return df
60
 
61
 
62
  # Readable hashing function similar to coolname or codenamize
 
32
  RESULTS_REPO,
33
  data_files="auto_submissions/metrics_all.csv",
34
  )["train"].to_pandas()
35
+ print("fetched results from HF", df.shape)
36
  assert all(
37
  col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
38
  ), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
 
56
  # Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
57
  # Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
58
  df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
59
+ df.to_csv("debug-current-results.csv", index=False)
 
60
 
61
 
62
  # Readable hashing function similar to coolname or codenamize