Spaces:
Runtime error
Runtime error
edbeeching
commited on
Commit
·
59c748f
1
Parent(s):
f90ad24
finished this part of leaderboard refactor
Browse files
utils.py
CHANGED
|
@@ -44,6 +44,7 @@ def make_clickable_model(model_name):
|
|
| 44 |
|
| 45 |
@dataclass
|
| 46 |
class EvalResult:
|
|
|
|
| 47 |
org : str
|
| 48 |
model : str
|
| 49 |
is_8bit : bool
|
|
@@ -51,8 +52,9 @@ class EvalResult:
|
|
| 51 |
|
| 52 |
def to_dict(self):
|
| 53 |
data_dict = {}
|
|
|
|
| 54 |
data_dict["base_model"] = make_clickable_model(f"{self.org}/{self.model}")
|
| 55 |
-
data_dict["total ⬆️"] = sum([v for k,v in self.results.items()])
|
| 56 |
data_dict["# params"] = "unknown (todo)"
|
| 57 |
|
| 58 |
for benchmark in BENCHMARKS:
|
|
@@ -86,8 +88,8 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, dict]:
|
|
| 86 |
for benchmark, metric in zip(BENCHMARKS, METRICS):
|
| 87 |
if benchmark in json_filepath:
|
| 88 |
accs = np.array([v[metric] for k, v in data["results"].items()])
|
| 89 |
-
mean_acc = np.mean(accs)
|
| 90 |
-
eval_result = EvalResult(org, model, is_8bit, {benchmark:mean_acc})
|
| 91 |
|
| 92 |
return result_key, eval_result
|
| 93 |
|
|
|
|
| 44 |
|
| 45 |
@dataclass
|
| 46 |
class EvalResult:
|
| 47 |
+
eval_name : str
|
| 48 |
org : str
|
| 49 |
model : str
|
| 50 |
is_8bit : bool
|
|
|
|
| 52 |
|
| 53 |
def to_dict(self):
|
| 54 |
data_dict = {}
|
| 55 |
+
data_dict["eval_name"] = self.eval_name
|
| 56 |
data_dict["base_model"] = make_clickable_model(f"{self.org}/{self.model}")
|
| 57 |
+
data_dict["total ⬆️"] = round(sum([v for k,v in self.results.items()]),3)
|
| 58 |
data_dict["# params"] = "unknown (todo)"
|
| 59 |
|
| 60 |
for benchmark in BENCHMARKS:
|
|
|
|
| 88 |
for benchmark, metric in zip(BENCHMARKS, METRICS):
|
| 89 |
if benchmark in json_filepath:
|
| 90 |
accs = np.array([v[metric] for k, v in data["results"].items()])
|
| 91 |
+
mean_acc = round(np.mean(accs),3)
|
| 92 |
+
eval_result = EvalResult(result_key, org, model, is_8bit, {benchmark:mean_acc})
|
| 93 |
|
| 94 |
return result_key, eval_result
|
| 95 |
|