Fix UI loading
Browse files- app.py +4 -4
- src/display/utils.py +9 -7
- src/leaderboard/read_evals.py +29 -27
- src/submission/submit.py +1 -1
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import gradio as gr
|
|
| 2 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
| 3 |
import pandas as pd
|
| 4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 5 |
-
from huggingface_hub import snapshot_download
|
| 6 |
|
| 7 |
from src.about import (
|
| 8 |
CITATION_BUTTON_LABEL,
|
|
@@ -19,10 +19,10 @@ from src.display.utils import (
|
|
| 19 |
EVAL_COLS,
|
| 20 |
EVAL_TYPES,
|
| 21 |
AutoEvalColumn,
|
| 22 |
-
|
| 23 |
fields,
|
| 24 |
-
WeightType,
|
| 25 |
-
Precision
|
| 26 |
)
|
| 27 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
| 28 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
|
|
|
| 2 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
| 3 |
import pandas as pd
|
| 4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 5 |
+
#from huggingface_hub import snapshot_download
|
| 6 |
|
| 7 |
from src.about import (
|
| 8 |
CITATION_BUTTON_LABEL,
|
|
|
|
| 19 |
EVAL_COLS,
|
| 20 |
EVAL_TYPES,
|
| 21 |
AutoEvalColumn,
|
| 22 |
+
# ModelType,
|
| 23 |
fields,
|
| 24 |
+
#WeightType,
|
| 25 |
+
#Precision
|
| 26 |
)
|
| 27 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
| 28 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
src/display/utils.py
CHANGED
|
@@ -31,14 +31,14 @@ for task in Tasks:
|
|
| 31 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 32 |
# Model information
|
| 33 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 34 |
-
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 35 |
-
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 36 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
| 37 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 38 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 39 |
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
| 40 |
-
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 41 |
-
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 42 |
|
| 43 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 44 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
@@ -60,8 +60,10 @@ class ModelDetails:
|
|
| 60 |
display_name: str = ""
|
| 61 |
symbol: str = "" # emoji
|
| 62 |
|
| 63 |
-
|
| 64 |
class ModelType(Enum):
|
|
|
|
|
|
|
| 65 |
PT = ModelDetails(name="pretrained", symbol="🟢")
|
| 66 |
FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
| 67 |
IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
|
@@ -99,7 +101,7 @@ class Precision(Enum):
|
|
| 99 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
| 100 |
return Precision.bfloat16
|
| 101 |
return Precision.Unknown
|
| 102 |
-
|
| 103 |
# Column selection
|
| 104 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
| 105 |
|
|
|
|
| 31 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 32 |
# Model information
|
| 33 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 34 |
+
#auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 35 |
+
#auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 36 |
+
#auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
| 37 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 38 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 39 |
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
| 40 |
+
#auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 41 |
+
#auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 42 |
|
| 43 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 44 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
|
| 60 |
display_name: str = ""
|
| 61 |
symbol: str = "" # emoji
|
| 62 |
|
| 63 |
+
"""
|
| 64 |
class ModelType(Enum):
|
| 65 |
+
|
| 66 |
+
|
| 67 |
PT = ModelDetails(name="pretrained", symbol="🟢")
|
| 68 |
FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
| 69 |
IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
|
|
|
| 101 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
| 102 |
return Precision.bfloat16
|
| 103 |
return Precision.Unknown
|
| 104 |
+
"""
|
| 105 |
# Column selection
|
| 106 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
| 107 |
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -8,7 +8,7 @@ import dateutil
|
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
from src.display.formatting import make_clickable_model
|
| 11 |
-
from src.display.utils import AutoEvalColumn, ModelType,
|
| 12 |
from src.submission.check_validity import is_model_on_hub
|
| 13 |
|
| 14 |
|
|
@@ -20,12 +20,13 @@ class EvalResult:
|
|
| 20 |
full_model: str # org/model (path on hub)
|
| 21 |
org: str
|
| 22 |
model: str
|
| 23 |
-
revision: str # commit hash, "" if main
|
| 24 |
results: dict
|
| 25 |
-
precision: Precision = Precision.Unknown
|
| 26 |
-
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
| 27 |
-
|
| 28 |
-
|
|
|
|
| 29 |
license: str = "?"
|
| 30 |
likes: int = 0
|
| 31 |
num_params: int = 0
|
|
@@ -41,54 +42,55 @@ class EvalResult:
|
|
| 41 |
config = data.get("config")
|
| 42 |
|
| 43 |
# Precision
|
| 44 |
-
precision = Precision.from_str(config.get("model_dtype"))
|
| 45 |
|
| 46 |
# Get model and org
|
| 47 |
-
org_and_model = config.get("
|
|
|
|
| 48 |
org_and_model = org_and_model.split("/", 1)
|
| 49 |
|
| 50 |
if len(org_and_model) == 1:
|
| 51 |
org = None
|
| 52 |
model = org_and_model[0]
|
| 53 |
-
result_key = f"{model}_{precision.value.name}
|
| 54 |
else:
|
| 55 |
org = org_and_model[0]
|
| 56 |
model = org_and_model[1]
|
| 57 |
-
result_key = f"{org}_{model}_{precision.value.name}
|
| 58 |
full_model = "/".join(org_and_model)
|
| 59 |
|
| 60 |
still_on_hub, _, model_config = is_model_on_hub(
|
| 61 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
| 62 |
)
|
| 63 |
|
| 64 |
-
|
| 65 |
-
still_on_hub = True
|
| 66 |
-
|
| 67 |
architecture = "?"
|
| 68 |
if model_config is not None:
|
| 69 |
architectures = getattr(model_config, "architectures", None)
|
| 70 |
if architectures:
|
| 71 |
architecture = ";".join(architectures)
|
|
|
|
| 72 |
|
| 73 |
# Extract results available in this file (some results are split in several files)
|
| 74 |
results = {}
|
| 75 |
|
| 76 |
results_obj = data.get("results")
|
| 77 |
print(results_obj)
|
| 78 |
-
results["
|
| 79 |
-
results["speed"] = results_obj.get("speed")
|
| 80 |
-
results["
|
| 81 |
|
| 82 |
return self(
|
| 83 |
eval_name=result_key,
|
| 84 |
full_model=full_model,
|
| 85 |
org=org,
|
| 86 |
model=model,
|
|
|
|
| 87 |
results=results,
|
| 88 |
-
precision=precision,
|
| 89 |
revision= config.get("model_sha", ""),
|
| 90 |
still_on_hub=still_on_hub,
|
| 91 |
-
architecture=architecture
|
| 92 |
)
|
| 93 |
|
| 94 |
def update_with_request_file(self, requests_path):
|
|
@@ -98,8 +100,8 @@ class EvalResult:
|
|
| 98 |
with open(request_file, "r") as f:
|
| 99 |
request = json.load(f)
|
| 100 |
|
| 101 |
-
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
| 102 |
-
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 103 |
self.license = request.get("license", "?")
|
| 104 |
self.likes = request.get("likes", 0)
|
| 105 |
self.num_params = request.get("params", 0)
|
|
@@ -112,13 +114,13 @@ class EvalResult:
|
|
| 112 |
average = self.results["average"]
|
| 113 |
data_dict = {
|
| 114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 115 |
-
AutoEvalColumn.precision.name: self.precision.value.name,
|
| 116 |
-
AutoEvalColumn.
|
| 117 |
-
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 118 |
-
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 119 |
-
AutoEvalColumn.architecture.name: self.architecture,
|
| 120 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 121 |
-
AutoEvalColumn.revision.name: self.revision,
|
| 122 |
AutoEvalColumn.average.name: average,
|
| 123 |
AutoEvalColumn.license.name: self.license,
|
| 124 |
AutoEvalColumn.likes.name: self.likes,
|
|
@@ -149,7 +151,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 149 |
req_content = json.load(f)
|
| 150 |
if (
|
| 151 |
req_content["status"] in ["FINISHED"]
|
| 152 |
-
and req_content["precision"] == precision.split(".")[-1]
|
| 153 |
):
|
| 154 |
request_file = tmp_request_file
|
| 155 |
return request_file
|
|
|
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
from src.display.formatting import make_clickable_model
|
| 11 |
+
from src.display.utils import AutoEvalColumn, Tasks#, ModelType, Precision, WeightType
|
| 12 |
from src.submission.check_validity import is_model_on_hub
|
| 13 |
|
| 14 |
|
|
|
|
| 20 |
full_model: str # org/model (path on hub)
|
| 21 |
org: str
|
| 22 |
model: str
|
| 23 |
+
#revision: str # commit hash, "" if main
|
| 24 |
results: dict
|
| 25 |
+
#precision: Precision = Precision.Unknown
|
| 26 |
+
#model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
| 27 |
+
model_source: str = "" # HF, private, ...
|
| 28 |
+
#weight_type: WeightType = WeightType.Original # Original or Adapter
|
| 29 |
+
#architecture: str = "Unknown"
|
| 30 |
license: str = "?"
|
| 31 |
likes: int = 0
|
| 32 |
num_params: int = 0
|
|
|
|
| 42 |
config = data.get("config")
|
| 43 |
|
| 44 |
# Precision
|
| 45 |
+
#precision = Precision.from_str(config.get("model_dtype"))
|
| 46 |
|
| 47 |
# Get model and org
|
| 48 |
+
org_and_model = config.get("model", config.get("model_args", None))
|
| 49 |
+
print("******* org_and_model **********", config)
|
| 50 |
org_and_model = org_and_model.split("/", 1)
|
| 51 |
|
| 52 |
if len(org_and_model) == 1:
|
| 53 |
org = None
|
| 54 |
model = org_and_model[0]
|
| 55 |
+
result_key = f"{model}"#_{precision.value.name}
|
| 56 |
else:
|
| 57 |
org = org_and_model[0]
|
| 58 |
model = org_and_model[1]
|
| 59 |
+
result_key = f"{org}_{model}"#_{precision.value.name}
|
| 60 |
full_model = "/".join(org_and_model)
|
| 61 |
|
| 62 |
still_on_hub, _, model_config = is_model_on_hub(
|
| 63 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
| 64 |
)
|
| 65 |
|
| 66 |
+
"""
|
|
|
|
|
|
|
| 67 |
architecture = "?"
|
| 68 |
if model_config is not None:
|
| 69 |
architectures = getattr(model_config, "architectures", None)
|
| 70 |
if architectures:
|
| 71 |
architecture = ";".join(architectures)
|
| 72 |
+
"""
|
| 73 |
|
| 74 |
# Extract results available in this file (some results are split in several files)
|
| 75 |
results = {}
|
| 76 |
|
| 77 |
results_obj = data.get("results")
|
| 78 |
print(results_obj)
|
| 79 |
+
results["average_score"] = results_obj.get("average_score")
|
| 80 |
+
results["speed"] = results_obj.get("speed")
|
| 81 |
+
results["contamination_score"] = results_obj.get("contamination_score")
|
| 82 |
|
| 83 |
return self(
|
| 84 |
eval_name=result_key,
|
| 85 |
full_model=full_model,
|
| 86 |
org=org,
|
| 87 |
model=model,
|
| 88 |
+
model_source=config.get("model_source", ""),
|
| 89 |
results=results,
|
| 90 |
+
#precision=precision,
|
| 91 |
revision= config.get("model_sha", ""),
|
| 92 |
still_on_hub=still_on_hub,
|
| 93 |
+
#architecture=architecture
|
| 94 |
)
|
| 95 |
|
| 96 |
def update_with_request_file(self, requests_path):
|
|
|
|
| 100 |
with open(request_file, "r") as f:
|
| 101 |
request = json.load(f)
|
| 102 |
|
| 103 |
+
#self.model_type = ModelType.from_str(request.get("model_type", ""))
|
| 104 |
+
#self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 105 |
self.license = request.get("license", "?")
|
| 106 |
self.likes = request.get("likes", 0)
|
| 107 |
self.num_params = request.get("params", 0)
|
|
|
|
| 114 |
average = self.results["average"]
|
| 115 |
data_dict = {
|
| 116 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 117 |
+
#AutoEvalColumn.precision.name: self.precision.value.name,
|
| 118 |
+
AutoEvalColumn.model_source.name: self.model_source.value.name,
|
| 119 |
+
#AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 120 |
+
#AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 121 |
+
#AutoEvalColumn.architecture.name: self.architecture,
|
| 122 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 123 |
+
#AutoEvalColumn.revision.name: self.revision,
|
| 124 |
AutoEvalColumn.average.name: average,
|
| 125 |
AutoEvalColumn.license.name: self.license,
|
| 126 |
AutoEvalColumn.likes.name: self.likes,
|
|
|
|
| 151 |
req_content = json.load(f)
|
| 152 |
if (
|
| 153 |
req_content["status"] in ["FINISHED"]
|
| 154 |
+
#and req_content["precision"] == precision.split(".")[-1]
|
| 155 |
):
|
| 156 |
request_file = tmp_request_file
|
| 157 |
return request_file
|
src/submission/submit.py
CHANGED
|
@@ -172,5 +172,5 @@ def add_new_eval(
|
|
| 172 |
|
| 173 |
|
| 174 |
return styled_message(
|
| 175 |
-
"Thank you for submitting your request! It has been placed in the evaluation queue.
|
| 176 |
)
|
|
|
|
| 172 |
|
| 173 |
|
| 174 |
return styled_message(
|
| 175 |
+
"Thank you for submitting your request! It has been placed in the evaluation queue. You can except the eval to be completed in 1 hour."
|
| 176 |
)
|