Spaces:
Runtime error
Runtime error
use BackgroundScheduler to restart space
Browse files
app.py
CHANGED
|
@@ -16,6 +16,11 @@ H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
|
| 16 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
| 17 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def get_all_requested_models(requested_models_dir):
|
| 21 |
depth = 1
|
|
@@ -142,18 +147,18 @@ def get_leaderboard():
|
|
| 142 |
all_data.append(gpt35_values)
|
| 143 |
|
| 144 |
base_line = {
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
all_data.append(base_line)
|
| 156 |
-
|
| 157 |
df = pd.DataFrame.from_records(all_data)
|
| 158 |
df = df.sort_values(by=["Average ⬆️"], ascending=False)
|
| 159 |
df = df[COLS]
|
|
@@ -287,7 +292,7 @@ def add_new_eval(
|
|
| 287 |
f.write(json.dumps(eval_entry))
|
| 288 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
| 289 |
|
| 290 |
-
api = HfApi()
|
| 291 |
api.upload_file(
|
| 292 |
path_or_fileobj=out_path,
|
| 293 |
path_in_repo=out_path,
|
|
@@ -306,6 +311,7 @@ def refresh():
|
|
| 306 |
get_leaderboard(), get_eval_table()
|
| 307 |
return leaderboard, finished_eval_queue, running_eval_queue, pending_eval_queue
|
| 308 |
|
|
|
|
| 309 |
custom_css = """
|
| 310 |
#changelog-text {
|
| 311 |
font-size: 18px !important;
|
|
@@ -331,8 +337,8 @@ We chose these benchmarks as they test a variety of reasoning and general knowle
|
|
| 331 |
)
|
| 332 |
|
| 333 |
with gr.Accordion("CHANGELOG", open=False):
|
| 334 |
-
changelog = gr.Markdown(CHANGELOG_TEXT,elem_id="changelog-text")
|
| 335 |
-
|
| 336 |
with gr.Row():
|
| 337 |
leaderboard_table = gr.components.Dataframe(
|
| 338 |
value=leaderboard, headers=COLS, datatype=TYPES, max_rows=5
|
|
@@ -415,4 +421,19 @@ We chose these benchmarks as they test a variety of reasoning and general knowle
|
|
| 415 |
],
|
| 416 |
submission_result,
|
| 417 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
demo.launch()
|
|
|
|
| 16 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
| 17 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
|
| 18 |
|
| 19 |
+
api = HfApi(token=H4_TOKEN)
|
| 20 |
+
|
| 21 |
+
def restart_space():
|
| 22 |
+
api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard")
|
| 23 |
+
|
| 24 |
|
| 25 |
def get_all_requested_models(requested_models_dir):
|
| 26 |
depth = 1
|
|
|
|
| 147 |
all_data.append(gpt35_values)
|
| 148 |
|
| 149 |
base_line = {
|
| 150 |
+
"Model": "<p>Baseline</p>",
|
| 151 |
+
"Revision": "N/A",
|
| 152 |
+
"8bit": None,
|
| 153 |
+
"Average ⬆️": 25.0,
|
| 154 |
+
"ARC (25-shot) ⬆️": 25.0,
|
| 155 |
+
"HellaSwag (10-shot) ⬆️": 25.0,
|
| 156 |
+
"MMLU (5-shot) ⬆️": 25.0,
|
| 157 |
+
"TruthfulQA (0-shot) ⬆️": 25.0,
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
all_data.append(base_line)
|
| 161 |
+
|
| 162 |
df = pd.DataFrame.from_records(all_data)
|
| 163 |
df = df.sort_values(by=["Average ⬆️"], ascending=False)
|
| 164 |
df = df[COLS]
|
|
|
|
| 292 |
f.write(json.dumps(eval_entry))
|
| 293 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
| 294 |
|
| 295 |
+
# api = HfApi()
|
| 296 |
api.upload_file(
|
| 297 |
path_or_fileobj=out_path,
|
| 298 |
path_in_repo=out_path,
|
|
|
|
| 311 |
get_leaderboard(), get_eval_table()
|
| 312 |
return leaderboard, finished_eval_queue, running_eval_queue, pending_eval_queue
|
| 313 |
|
| 314 |
+
|
| 315 |
custom_css = """
|
| 316 |
#changelog-text {
|
| 317 |
font-size: 18px !important;
|
|
|
|
| 337 |
)
|
| 338 |
|
| 339 |
with gr.Accordion("CHANGELOG", open=False):
|
| 340 |
+
changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
|
| 341 |
+
|
| 342 |
with gr.Row():
|
| 343 |
leaderboard_table = gr.components.Dataframe(
|
| 344 |
value=leaderboard, headers=COLS, datatype=TYPES, max_rows=5
|
|
|
|
| 421 |
],
|
| 422 |
submission_result,
|
| 423 |
)
|
| 424 |
+
|
| 425 |
+
# demo.load(
|
| 426 |
+
# refresh,
|
| 427 |
+
# inputs=[],
|
| 428 |
+
# outputs=[
|
| 429 |
+
# leaderboard_table,
|
| 430 |
+
# finished_eval_table,
|
| 431 |
+
# running_eval_table,
|
| 432 |
+
# pending_eval_table,
|
| 433 |
+
# ],
|
| 434 |
+
# )
|
| 435 |
+
|
| 436 |
+
scheduler = BackgroundScheduler()
|
| 437 |
+
scheduler.add_job(restart_space, 'interval', seconds=3600)
|
| 438 |
+
scheduler.start()
|
| 439 |
demo.launch()
|