reject duplicate submission
Browse files
app.py
CHANGED
|
@@ -15,7 +15,21 @@ H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
|
| 15 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
| 16 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
repo = None
|
|
|
|
| 19 |
if H4_TOKEN:
|
| 20 |
print("pulling repo")
|
| 21 |
# try:
|
|
@@ -31,6 +45,9 @@ if H4_TOKEN:
|
|
| 31 |
)
|
| 32 |
repo.git_pull()
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# parse the results
|
| 36 |
BENCHMARKS = ["arc_challenge", "hellaswag", "hendrycks", "truthfulqa_mc"]
|
|
@@ -110,7 +127,7 @@ def get_leaderboard():
|
|
| 110 |
|
| 111 |
dataframe = pd.DataFrame.from_records(all_data)
|
| 112 |
dataframe = dataframe.sort_values(by=["Average ⬆️"], ascending=False)
|
| 113 |
-
print(dataframe)
|
| 114 |
dataframe = dataframe[COLS]
|
| 115 |
return dataframe
|
| 116 |
|
|
@@ -187,12 +204,12 @@ def add_new_eval(
|
|
| 187 |
if is_delta_weight and not is_model_on_hub(base_model, revision):
|
| 188 |
error_message = f'Base model "{base_model}" was not found on hub!'
|
| 189 |
print(error_message)
|
| 190 |
-
return f"<p style='color: red; font-size:
|
| 191 |
|
| 192 |
if not is_model_on_hub(model, revision):
|
| 193 |
error_message = f'Model "{model}"was not found on hub!'
|
| 194 |
print(error_message)
|
| 195 |
-
return f"<p style='color: red; font-size:
|
| 196 |
|
| 197 |
print("adding new eval")
|
| 198 |
|
|
@@ -216,6 +233,11 @@ def add_new_eval(
|
|
| 216 |
os.makedirs(OUT_DIR, exist_ok=True)
|
| 217 |
out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
with open(out_path, "w") as f:
|
| 220 |
f.write(json.dumps(eval_entry))
|
| 221 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
|
@@ -230,7 +252,7 @@ def add_new_eval(
|
|
| 230 |
)
|
| 231 |
|
| 232 |
success_message = "Your request has been submitted to the evaluation queue!"
|
| 233 |
-
return f"<p style='color: green; font-size:
|
| 234 |
|
| 235 |
|
| 236 |
def refresh():
|
|
|
|
| 15 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
| 16 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
|
| 17 |
|
| 18 |
+
|
| 19 |
+
def get_all_requested_models(requested_models_dir):
|
| 20 |
+
depth = 1
|
| 21 |
+
file_names = []
|
| 22 |
+
|
| 23 |
+
for root, dirs, files in os.walk(requested_models_dir):
|
| 24 |
+
current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
|
| 25 |
+
if current_depth == depth:
|
| 26 |
+
file_names.extend([os.path.join(root, file) for file in files])
|
| 27 |
+
|
| 28 |
+
return set([file_name.lower().split("./evals/")[1] for file_name in file_names])
|
| 29 |
+
|
| 30 |
+
|
| 31 |
repo = None
|
| 32 |
+
requested_models = None
|
| 33 |
if H4_TOKEN:
|
| 34 |
print("pulling repo")
|
| 35 |
# try:
|
|
|
|
| 45 |
)
|
| 46 |
repo.git_pull()
|
| 47 |
|
| 48 |
+
requested_models_dir = "./evals/eval_requests"
|
| 49 |
+
requested_models = get_all_requested_models(requested_models_dir)
|
| 50 |
+
|
| 51 |
|
| 52 |
# parse the results
|
| 53 |
BENCHMARKS = ["arc_challenge", "hellaswag", "hendrycks", "truthfulqa_mc"]
|
|
|
|
| 127 |
|
| 128 |
dataframe = pd.DataFrame.from_records(all_data)
|
| 129 |
dataframe = dataframe.sort_values(by=["Average ⬆️"], ascending=False)
|
| 130 |
+
# print(dataframe)
|
| 131 |
dataframe = dataframe[COLS]
|
| 132 |
return dataframe
|
| 133 |
|
|
|
|
| 204 |
if is_delta_weight and not is_model_on_hub(base_model, revision):
|
| 205 |
error_message = f'Base model "{base_model}" was not found on hub!'
|
| 206 |
print(error_message)
|
| 207 |
+
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error_message}</p>"
|
| 208 |
|
| 209 |
if not is_model_on_hub(model, revision):
|
| 210 |
error_message = f'Model "{model}"was not found on hub!'
|
| 211 |
print(error_message)
|
| 212 |
+
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error_message}</p>"
|
| 213 |
|
| 214 |
print("adding new eval")
|
| 215 |
|
|
|
|
| 233 |
os.makedirs(OUT_DIR, exist_ok=True)
|
| 234 |
out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
|
| 235 |
|
| 236 |
+
# Check for duplicate submission
|
| 237 |
+
if out_path.lower() in requested_models:
|
| 238 |
+
duplicate_request_message = "This model has been already submitted."
|
| 239 |
+
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{duplicate_request_message}</p>"
|
| 240 |
+
|
| 241 |
with open(out_path, "w") as f:
|
| 242 |
f.write(json.dumps(eval_entry))
|
| 243 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
|
|
|
| 252 |
)
|
| 253 |
|
| 254 |
success_message = "Your request has been submitted to the evaluation queue!"
|
| 255 |
+
return f"<p style='color: green; font-size: 20px; text-align: center;'>{success_message}</p>"
|
| 256 |
|
| 257 |
|
| 258 |
def refresh():
|
utils.py
CHANGED
|
@@ -133,4 +133,4 @@ def get_eval_results_dicts(is_public=True) -> List[Dict]:
|
|
| 133 |
return [e.to_dict() for e in eval_results]
|
| 134 |
|
| 135 |
eval_results_dict = get_eval_results_dicts()
|
| 136 |
-
print(eval_results_dict)
|
|
|
|
| 133 |
return [e.to_dict() for e in eval_results]
|
| 134 |
|
| 135 |
eval_results_dict = get_eval_results_dicts()
|
| 136 |
+
# print(eval_results_dict)
|