test
Browse files- src/about.py +2 -2
- src/leaderboard/read_evals.py +13 -5
src/about.py
CHANGED
|
@@ -12,8 +12,8 @@ class Task:
|
|
| 12 |
# ---------------------------------------------------
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
-
task0 = Task("
|
| 16 |
-
task1 = Task("
|
| 17 |
|
| 18 |
NUM_FEWSHOT = 0 # Change with your few shot
|
| 19 |
# ---------------------------------------------------
|
|
|
|
| 12 |
# ---------------------------------------------------
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
+
task0 = Task("speed", "spq", "Speed")
|
| 16 |
+
task1 = Task("contamination", "score", "Contamination")
|
| 17 |
|
| 18 |
NUM_FEWSHOT = 0 # Change with your few shot
|
| 19 |
# ---------------------------------------------------
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -60,6 +60,10 @@ class EvalResult:
|
|
| 60 |
still_on_hub, _, model_config = is_model_on_hub(
|
| 61 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
| 62 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
architecture = "?"
|
| 64 |
if model_config is not None:
|
| 65 |
architectures = getattr(model_config, "architectures", None)
|
|
@@ -90,11 +94,10 @@ class EvalResult:
|
|
| 90 |
def update_with_request_file(self, requests_path):
|
| 91 |
"""Finds the relevant request file for the current model and updates info with it"""
|
| 92 |
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
|
| 93 |
-
|
| 94 |
try:
|
| 95 |
with open(request_file, "r") as f:
|
| 96 |
request = json.load(f)
|
| 97 |
-
|
| 98 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
| 99 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 100 |
self.license = request.get("license", "?")
|
|
@@ -106,7 +109,7 @@ class EvalResult:
|
|
| 106 |
|
| 107 |
def to_dict(self):
|
| 108 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 109 |
-
average =
|
| 110 |
data_dict = {
|
| 111 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 112 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
@@ -135,6 +138,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 135 |
requests_path,
|
| 136 |
f"{model_name}_eval_request_*.json",
|
| 137 |
)
|
|
|
|
| 138 |
request_files = glob.glob(request_files)
|
| 139 |
|
| 140 |
# Select correct request file (precision)
|
|
@@ -174,7 +178,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 174 |
# Creation of result
|
| 175 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 176 |
eval_result.update_with_request_file(requests_path)
|
| 177 |
-
|
| 178 |
# Store results of same eval together
|
| 179 |
eval_name = eval_result.eval_name
|
| 180 |
if eval_name in eval_results.keys():
|
|
@@ -183,13 +187,17 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 183 |
eval_results[eval_name] = eval_result
|
| 184 |
|
| 185 |
results = []
|
| 186 |
-
|
| 187 |
for v in eval_results.values():
|
| 188 |
try:
|
| 189 |
print(v.to_dict())
|
| 190 |
v.to_dict() # we test if the dict version is complete
|
| 191 |
results.append(v)
|
| 192 |
except KeyError: # not all eval values present
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
continue
|
| 194 |
|
| 195 |
print(results)
|
|
|
|
| 60 |
still_on_hub, _, model_config = is_model_on_hub(
|
| 61 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
| 62 |
)
|
| 63 |
+
|
| 64 |
+
##make still_on_hub always true for now:
|
| 65 |
+
still_on_hub = True
|
| 66 |
+
|
| 67 |
architecture = "?"
|
| 68 |
if model_config is not None:
|
| 69 |
architectures = getattr(model_config, "architectures", None)
|
|
|
|
| 94 |
def update_with_request_file(self, requests_path):
|
| 95 |
"""Finds the relevant request file for the current model and updates info with it"""
|
| 96 |
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
|
|
|
|
| 97 |
try:
|
| 98 |
with open(request_file, "r") as f:
|
| 99 |
request = json.load(f)
|
| 100 |
+
|
| 101 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
| 102 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 103 |
self.license = request.get("license", "?")
|
|
|
|
| 109 |
|
| 110 |
def to_dict(self):
|
| 111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 112 |
+
average = self.results["average"]
|
| 113 |
data_dict = {
|
| 114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 115 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
|
| 138 |
requests_path,
|
| 139 |
f"{model_name}_eval_request_*.json",
|
| 140 |
)
|
| 141 |
+
|
| 142 |
request_files = glob.glob(request_files)
|
| 143 |
|
| 144 |
# Select correct request file (precision)
|
|
|
|
| 178 |
# Creation of result
|
| 179 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 180 |
eval_result.update_with_request_file(requests_path)
|
| 181 |
+
|
| 182 |
# Store results of same eval together
|
| 183 |
eval_name = eval_result.eval_name
|
| 184 |
if eval_name in eval_results.keys():
|
|
|
|
| 187 |
eval_results[eval_name] = eval_result
|
| 188 |
|
| 189 |
results = []
|
| 190 |
+
#print(eval_results.values())
|
| 191 |
for v in eval_results.values():
|
| 192 |
try:
|
| 193 |
print(v.to_dict())
|
| 194 |
v.to_dict() # we test if the dict version is complete
|
| 195 |
results.append(v)
|
| 196 |
except KeyError: # not all eval values present
|
| 197 |
+
print("Key error in eval result, skipping")
|
| 198 |
+
|
| 199 |
+
print(v)
|
| 200 |
+
print(v.to_dict())
|
| 201 |
continue
|
| 202 |
|
| 203 |
print(results)
|