Spaces:
Runtime error
Runtime error
chore: Update app.py to include select_columns and hide_columns in init_leaderboard function
Browse files- app.py +23 -7
- src/about.py +2 -2
- src/display/utils.py +9 -3
- src/envs.py +5 -3
app.py
CHANGED
|
@@ -19,30 +19,46 @@ from src.display.utils import (
|
|
| 19 |
EVAL_COLS,
|
| 20 |
EVAL_TYPES,
|
| 21 |
AutoEvalColumn,
|
| 22 |
-
ModelType,
|
| 23 |
fields,
|
| 24 |
-
WeightType,
|
| 25 |
-
Precision
|
| 26 |
)
|
| 27 |
-
from src.envs import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
from src.populate import get_leaderboard_df
|
| 29 |
|
| 30 |
|
| 31 |
def restart_space():
|
| 32 |
API.restart_space(repo_id=REPO_ID)
|
| 33 |
|
|
|
|
| 34 |
### Space initialisation
|
| 35 |
try:
|
| 36 |
print(EVAL_DETAILED_RESULTS_REPO)
|
| 37 |
snapshot_download(
|
| 38 |
-
repo_id=EVAL_DETAILED_RESULTS_REPO,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
)
|
| 40 |
except Exception:
|
| 41 |
restart_space()
|
| 42 |
try:
|
| 43 |
print(EVAL_RESULTS_PATH)
|
| 44 |
snapshot_download(
|
| 45 |
-
repo_id=RESULTS_REPO,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
)
|
| 47 |
except Exception:
|
| 48 |
restart_space()
|
|
@@ -96,4 +112,4 @@ with demo:
|
|
| 96 |
scheduler = BackgroundScheduler()
|
| 97 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
| 98 |
scheduler.start()
|
| 99 |
-
demo.queue(default_concurrency_limit=40).launch()
|
|
|
|
| 19 |
EVAL_COLS,
|
| 20 |
EVAL_TYPES,
|
| 21 |
AutoEvalColumn,
|
|
|
|
| 22 |
fields,
|
|
|
|
|
|
|
| 23 |
)
|
| 24 |
+
from src.envs import (
|
| 25 |
+
API,
|
| 26 |
+
EVAL_DETAILED_RESULTS_PATH,
|
| 27 |
+
EVAL_RESULTS_PATH,
|
| 28 |
+
EVAL_DETAILED_RESULTS_REPO,
|
| 29 |
+
REPO_ID,
|
| 30 |
+
RESULTS_REPO,
|
| 31 |
+
TOKEN,
|
| 32 |
+
)
|
| 33 |
from src.populate import get_leaderboard_df
|
| 34 |
|
| 35 |
|
| 36 |
def restart_space():
|
| 37 |
API.restart_space(repo_id=REPO_ID)
|
| 38 |
|
| 39 |
+
|
| 40 |
### Space initialisation
|
| 41 |
try:
|
| 42 |
print(EVAL_DETAILED_RESULTS_REPO)
|
| 43 |
snapshot_download(
|
| 44 |
+
repo_id=EVAL_DETAILED_RESULTS_REPO,
|
| 45 |
+
local_dir=EVAL_DETAILED_RESULTS_PATH,
|
| 46 |
+
repo_type="dataset",
|
| 47 |
+
tqdm_class=None,
|
| 48 |
+
etag_timeout=30,
|
| 49 |
+
token=TOKEN,
|
| 50 |
)
|
| 51 |
except Exception:
|
| 52 |
restart_space()
|
| 53 |
try:
|
| 54 |
print(EVAL_RESULTS_PATH)
|
| 55 |
snapshot_download(
|
| 56 |
+
repo_id=RESULTS_REPO,
|
| 57 |
+
local_dir=EVAL_RESULTS_PATH,
|
| 58 |
+
repo_type="dataset",
|
| 59 |
+
tqdm_class=None,
|
| 60 |
+
etag_timeout=30,
|
| 61 |
+
token=TOKEN,
|
| 62 |
)
|
| 63 |
except Exception:
|
| 64 |
restart_space()
|
|
|
|
| 112 |
scheduler = BackgroundScheduler()
|
| 113 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
| 114 |
scheduler.start()
|
| 115 |
+
demo.queue(default_concurrency_limit=40).launch()
|
src/about.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from dataclasses import dataclass
|
| 2 |
from enum import Enum
|
| 3 |
|
|
|
|
| 4 |
@dataclass
|
| 5 |
class Task:
|
| 6 |
benchmark: str
|
|
@@ -16,11 +17,10 @@ class Tasks(Enum):
|
|
| 16 |
further_insights = Task("Further Insights", "acc", "Further Insights")
|
| 17 |
|
| 18 |
|
| 19 |
-
NUM_FEWSHOT = 0
|
| 20 |
# ---------------------------------------------------
|
| 21 |
|
| 22 |
|
| 23 |
-
|
| 24 |
# Your leaderboard name
|
| 25 |
TITLE = """<h1 align="center" id="space-title">LiveBench</h1>"""
|
| 26 |
|
|
|
|
| 1 |
from dataclasses import dataclass
|
| 2 |
from enum import Enum
|
| 3 |
|
| 4 |
+
|
| 5 |
@dataclass
|
| 6 |
class Task:
|
| 7 |
benchmark: str
|
|
|
|
| 17 |
further_insights = Task("Further Insights", "acc", "Further Insights")
|
| 18 |
|
| 19 |
|
| 20 |
+
NUM_FEWSHOT = 0 # Change with your few shot
|
| 21 |
# ---------------------------------------------------
|
| 22 |
|
| 23 |
|
|
|
|
| 24 |
# Your leaderboard name
|
| 25 |
TITLE = """<h1 align="center" id="space-title">LiveBench</h1>"""
|
| 26 |
|
src/display/utils.py
CHANGED
|
@@ -5,6 +5,7 @@ import pandas as pd
|
|
| 5 |
|
| 6 |
from src.about import Tasks
|
| 7 |
|
|
|
|
| 8 |
def fields(raw_class):
|
| 9 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
| 10 |
|
|
@@ -20,12 +21,15 @@ class ColumnContent:
|
|
| 20 |
hidden: bool = False
|
| 21 |
never_hidden: bool = False
|
| 22 |
|
|
|
|
| 23 |
## Leaderboard columns
|
| 24 |
auto_eval_column_dict = []
|
| 25 |
# Init
|
| 26 |
# auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 27 |
-
auto_eval_column_dict.append(
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
auto_eval_column_dict.append(["Overall", ColumnContent, ColumnContent("Total", "number", True)])
|
| 30 |
for task in Tasks:
|
| 31 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
|
@@ -33,6 +37,7 @@ for task in Tasks:
|
|
| 33 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 34 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
| 35 |
|
|
|
|
| 36 |
## For the queue columns in the submission tab
|
| 37 |
@dataclass(frozen=True)
|
| 38 |
class EvalQueueColumn: # Queue column
|
|
@@ -43,12 +48,13 @@ class EvalQueueColumn: # Queue column
|
|
| 43 |
weight_type = ColumnContent("weight_type", "str", "Original")
|
| 44 |
status = ColumnContent("status", "str", True)
|
| 45 |
|
|
|
|
| 46 |
## All the model information that we might need
|
| 47 |
@dataclass
|
| 48 |
class ModelDetails:
|
| 49 |
name: str
|
| 50 |
display_name: str = ""
|
| 51 |
-
symbol: str = ""
|
| 52 |
|
| 53 |
|
| 54 |
# Column selection
|
|
|
|
| 5 |
|
| 6 |
from src.about import Tasks
|
| 7 |
|
| 8 |
+
|
| 9 |
def fields(raw_class):
|
| 10 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
| 11 |
|
|
|
|
| 21 |
hidden: bool = False
|
| 22 |
never_hidden: bool = False
|
| 23 |
|
| 24 |
+
|
| 25 |
## Leaderboard columns
|
| 26 |
auto_eval_column_dict = []
|
| 27 |
# Init
|
| 28 |
# auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 29 |
+
auto_eval_column_dict.append(
|
| 30 |
+
["model", ColumnContent, ColumnContent("Model Name", "markdown", True, never_hidden=True)]
|
| 31 |
+
)
|
| 32 |
+
# Scores
|
| 33 |
auto_eval_column_dict.append(["Overall", ColumnContent, ColumnContent("Total", "number", True)])
|
| 34 |
for task in Tasks:
|
| 35 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
|
|
|
| 37 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 38 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
| 39 |
|
| 40 |
+
|
| 41 |
## For the queue columns in the submission tab
|
| 42 |
@dataclass(frozen=True)
|
| 43 |
class EvalQueueColumn: # Queue column
|
|
|
|
| 48 |
weight_type = ColumnContent("weight_type", "str", "Original")
|
| 49 |
status = ColumnContent("status", "str", True)
|
| 50 |
|
| 51 |
+
|
| 52 |
## All the model information that we might need
|
| 53 |
@dataclass
|
| 54 |
class ModelDetails:
|
| 55 |
name: str
|
| 56 |
display_name: str = ""
|
| 57 |
+
symbol: str = "" # emoji
|
| 58 |
|
| 59 |
|
| 60 |
# Column selection
|
src/envs.py
CHANGED
|
@@ -4,9 +4,11 @@ from huggingface_hub import HfApi
|
|
| 4 |
|
| 5 |
# Info to change for your repository
|
| 6 |
# ----------------------------------
|
| 7 |
-
TOKEN = os.environ.get("HF_TOKEN")
|
| 8 |
|
| 9 |
-
OWNER =
|
|
|
|
|
|
|
| 10 |
# ----------------------------------
|
| 11 |
|
| 12 |
REPO_ID = f"{OWNER}/leaderboard"
|
|
@@ -14,7 +16,7 @@ EVAL_DETAILED_RESULTS_REPO = f"{OWNER}/LiveBenchDetailedResults"
|
|
| 14 |
RESULTS_REPO = f"{OWNER}/LiveBenchResults"
|
| 15 |
|
| 16 |
# If you setup a cache later, just change HF_HOME
|
| 17 |
-
CACHE_PATH=os.getenv("HF_HOME", ".")
|
| 18 |
|
| 19 |
# Local caches
|
| 20 |
EVAL_DETAILED_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-detailed-results")
|
|
|
|
| 4 |
|
| 5 |
# Info to change for your repository
|
| 6 |
# ----------------------------------
|
| 7 |
+
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
| 8 |
|
| 9 |
+
OWNER = (
|
| 10 |
+
"lmms-lab" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
| 11 |
+
)
|
| 12 |
# ----------------------------------
|
| 13 |
|
| 14 |
REPO_ID = f"{OWNER}/leaderboard"
|
|
|
|
| 16 |
RESULTS_REPO = f"{OWNER}/LiveBenchResults"
|
| 17 |
|
| 18 |
# If you setup a cache later, just change HF_HOME
|
| 19 |
+
CACHE_PATH = os.getenv("HF_HOME", ".")
|
| 20 |
|
| 21 |
# Local caches
|
| 22 |
EVAL_DETAILED_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-detailed-results")
|