Update submission
Browse files- app.py +1 -31
- src/submission/check_validity.py +4 -4
- src/submission/submit.py +35 -27
app.py
CHANGED
|
@@ -148,43 +148,13 @@ with demo:
|
|
| 148 |
with gr.Row():
|
| 149 |
with gr.Column():
|
| 150 |
model_name_textbox = gr.Textbox(label="Model name")
|
| 151 |
-
|
| 152 |
-
model_type = gr.Dropdown(
|
| 153 |
-
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
| 154 |
-
label="Model type",
|
| 155 |
-
multiselect=False,
|
| 156 |
-
value=None,
|
| 157 |
-
interactive=True,
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
with gr.Column():
|
| 161 |
-
precision = gr.Dropdown(
|
| 162 |
-
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
| 163 |
-
label="Precision",
|
| 164 |
-
multiselect=False,
|
| 165 |
-
value="float16",
|
| 166 |
-
interactive=True,
|
| 167 |
-
)
|
| 168 |
-
weight_type = gr.Dropdown(
|
| 169 |
-
choices=[i.value.name for i in WeightType],
|
| 170 |
-
label="Weights type",
|
| 171 |
-
multiselect=False,
|
| 172 |
-
value="Original",
|
| 173 |
-
interactive=True,
|
| 174 |
-
)
|
| 175 |
-
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
| 176 |
-
|
| 177 |
submit_button = gr.Button("Submit Eval")
|
| 178 |
submission_result = gr.Markdown()
|
| 179 |
submit_button.click(
|
| 180 |
add_new_eval,
|
| 181 |
[
|
| 182 |
model_name_textbox,
|
| 183 |
-
base_model_name_textbox,
|
| 184 |
-
revision_name_textbox,
|
| 185 |
-
precision,
|
| 186 |
-
weight_type,
|
| 187 |
-
model_type,
|
| 188 |
],
|
| 189 |
submission_result,
|
| 190 |
)
|
|
|
|
| 148 |
with gr.Row():
|
| 149 |
with gr.Column():
|
| 150 |
model_name_textbox = gr.Textbox(label="Model name")
|
| 151 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
submit_button = gr.Button("Submit Eval")
|
| 153 |
submission_result = gr.Markdown()
|
| 154 |
submit_button.click(
|
| 155 |
add_new_eval,
|
| 156 |
[
|
| 157 |
model_name_textbox,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
],
|
| 159 |
submission_result,
|
| 160 |
)
|
src/submission/check_validity.py
CHANGED
|
@@ -31,7 +31,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
| 31 |
|
| 32 |
return True, ""
|
| 33 |
|
| 34 |
-
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
| 35 |
"""Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
|
| 36 |
try:
|
| 37 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
|
@@ -59,14 +59,14 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
| 59 |
return False, "was not found on hub!", None
|
| 60 |
|
| 61 |
|
| 62 |
-
def get_model_size(model_info: ModelInfo
|
| 63 |
"""Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
|
| 64 |
try:
|
| 65 |
model_size = round(model_info.safetensors["total"] / 1e9, 3)
|
| 66 |
except (AttributeError, TypeError):
|
| 67 |
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
|
| 68 |
-
|
| 69 |
-
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
|
| 70 |
model_size = size_factor * model_size
|
| 71 |
return model_size
|
| 72 |
|
|
|
|
| 31 |
|
| 32 |
return True, ""
|
| 33 |
|
| 34 |
+
def is_model_on_hub(model_name: str, revision: str = "main", token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
| 35 |
"""Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
|
| 36 |
try:
|
| 37 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
|
|
|
| 59 |
return False, "was not found on hub!", None
|
| 60 |
|
| 61 |
|
| 62 |
+
def get_model_size(model_info: ModelInfo): #, precision: str
|
| 63 |
"""Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
|
| 64 |
try:
|
| 65 |
model_size = round(model_info.safetensors["total"] / 1e9, 3)
|
| 66 |
except (AttributeError, TypeError):
|
| 67 |
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
|
| 68 |
+
print("******* model size **********",model_size)
|
| 69 |
+
size_factor = 1#8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
|
| 70 |
model_size = size_factor * model_size
|
| 71 |
return model_size
|
| 72 |
|
src/submission/submit.py
CHANGED
|
@@ -17,11 +17,11 @@ USERS_TO_SUBMISSION_DATES = None
|
|
| 17 |
|
| 18 |
def add_new_eval(
|
| 19 |
model: str,
|
| 20 |
-
base_model: str,
|
| 21 |
-
revision: str,
|
| 22 |
-
precision: str,
|
| 23 |
-
weight_type: str,
|
| 24 |
-
model_type: str,
|
| 25 |
):
|
| 26 |
global REQUESTED_MODELS
|
| 27 |
global USERS_TO_SUBMISSION_DATES
|
|
@@ -34,9 +34,13 @@ def add_new_eval(
|
|
| 34 |
user_name = model.split("/")[0]
|
| 35 |
model_path = model.split("/")[1]
|
| 36 |
|
| 37 |
-
precision = precision.split(" ")[0]
|
| 38 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
if model_type is None or model_type == "":
|
| 41 |
return styled_error("Please select a model type.")
|
| 42 |
|
|
@@ -54,14 +58,14 @@ def add_new_eval(
|
|
| 54 |
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
|
| 55 |
if not model_on_hub:
|
| 56 |
return styled_error(f'Model "{model}" {error}')
|
| 57 |
-
|
| 58 |
# Is the model info correctly filled?
|
| 59 |
try:
|
| 60 |
-
model_info = API.model_info(repo_id=model
|
| 61 |
except Exception:
|
| 62 |
return styled_error("Could not get your model information. Please fill it up properly.")
|
| 63 |
|
| 64 |
-
model_size = get_model_size(model_info=model_info
|
| 65 |
|
| 66 |
if model_size>30:
|
| 67 |
return styled_error("Due to limited GPU availability, evaluations for models larger than 30B are currently not automated. Please open a ticket here so we do it manually for you. https://huggingface.co/spaces/silma-ai/Arabic-Broad-Leaderboard/discussions")
|
|
@@ -77,31 +81,32 @@ def add_new_eval(
|
|
| 77 |
return styled_error(error_msg)
|
| 78 |
|
| 79 |
# Seems good, creating the eval
|
| 80 |
-
print("
|
| 81 |
|
| 82 |
eval_entry = {
|
| 83 |
"model": model,
|
| 84 |
-
"
|
| 85 |
-
"
|
| 86 |
-
"
|
| 87 |
-
"
|
|
|
|
| 88 |
"status": "PENDING",
|
| 89 |
"submitted_time": current_time,
|
| 90 |
-
"model_type": model_type,
|
| 91 |
"likes": model_info.likes,
|
| 92 |
"params": model_size,
|
| 93 |
"license": license,
|
| 94 |
-
"private": False,
|
| 95 |
}
|
| 96 |
|
| 97 |
# Check for duplicate submission
|
| 98 |
-
if f"{model}_{revision}_{precision}
|
| 99 |
return styled_warning("This model has been already submitted.")
|
| 100 |
|
| 101 |
print("Creating eval file")
|
| 102 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
| 103 |
os.makedirs(OUT_DIR, exist_ok=True)
|
| 104 |
-
out_path = f"{OUT_DIR}/{model_path}
|
| 105 |
|
| 106 |
with open(out_path, "w") as f:
|
| 107 |
f.write(json.dumps(eval_entry))
|
|
@@ -109,8 +114,8 @@ def add_new_eval(
|
|
| 109 |
|
| 110 |
##update queue file
|
| 111 |
queue_file_path = "./eval_queue.json"
|
| 112 |
-
|
| 113 |
-
## download queue_file from repo using
|
| 114 |
queue_file = hf_hub_download(
|
| 115 |
filename=queue_file_path,
|
| 116 |
repo_id=QUEUE_REPO,
|
|
@@ -121,18 +126,24 @@ def add_new_eval(
|
|
| 121 |
|
| 122 |
with open(queue_file, "r") as f:
|
| 123 |
queue_data = json.load(f)
|
|
|
|
| 124 |
if len(queue_data) == 0:
|
| 125 |
queue_data = []
|
|
|
|
| 126 |
queue_data.append(eval_entry)
|
|
|
|
| 127 |
print(queue_data)
|
|
|
|
| 128 |
#with open(queue_file, "w") as f:
|
| 129 |
# json.dump(queue_data, f)
|
| 130 |
-
|
|
|
|
| 131 |
API.upload_file(
|
| 132 |
path_or_fileobj=json.dumps(queue_data, indent=2).encode("utf-8"),
|
| 133 |
path_in_repo=queue_file_path,
|
| 134 |
repo_id=QUEUE_REPO,
|
| 135 |
-
repo_type="space"
|
|
|
|
| 136 |
)
|
| 137 |
|
| 138 |
|
|
@@ -142,17 +153,14 @@ def add_new_eval(
|
|
| 142 |
path_in_repo=out_path,
|
| 143 |
repo_id=QUEUE_REPO,
|
| 144 |
repo_type="space",
|
| 145 |
-
commit_message=f"Add {model}
|
| 146 |
)
|
| 147 |
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
# Remove the local file
|
| 154 |
os.remove(out_path)
|
| 155 |
|
|
|
|
| 156 |
return styled_message(
|
| 157 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an 15 minutes for the model to show in the PENDING list."
|
| 158 |
)
|
|
|
|
| 17 |
|
| 18 |
def add_new_eval(
|
| 19 |
model: str,
|
| 20 |
+
#base_model: str,
|
| 21 |
+
#revision: str,
|
| 22 |
+
#precision: str,
|
| 23 |
+
#weight_type: str,
|
| 24 |
+
#model_type: str,
|
| 25 |
):
|
| 26 |
global REQUESTED_MODELS
|
| 27 |
global USERS_TO_SUBMISSION_DATES
|
|
|
|
| 34 |
user_name = model.split("/")[0]
|
| 35 |
model_path = model.split("/")[1]
|
| 36 |
|
| 37 |
+
#precision = precision.split(" ")[0]
|
| 38 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
| 39 |
|
| 40 |
+
if not is_model_on_hub(model_name=model, token=TOKEN, test_tokenizer=True): #revision=revision
|
| 41 |
+
return styled_error("Model does not exist on HF Hub. Please select a valid model name.")
|
| 42 |
+
|
| 43 |
+
"""
|
| 44 |
if model_type is None or model_type == "":
|
| 45 |
return styled_error("Please select a model type.")
|
| 46 |
|
|
|
|
| 58 |
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
|
| 59 |
if not model_on_hub:
|
| 60 |
return styled_error(f'Model "{model}" {error}')
|
| 61 |
+
"""
|
| 62 |
# Is the model info correctly filled?
|
| 63 |
try:
|
| 64 |
+
model_info = API.model_info(repo_id=model)#, revision=revision
|
| 65 |
except Exception:
|
| 66 |
return styled_error("Could not get your model information. Please fill it up properly.")
|
| 67 |
|
| 68 |
+
model_size = get_model_size(model_info=model_info)#, precision=precision
|
| 69 |
|
| 70 |
if model_size>30:
|
| 71 |
return styled_error("Due to limited GPU availability, evaluations for models larger than 30B are currently not automated. Please open a ticket here so we do it manually for you. https://huggingface.co/spaces/silma-ai/Arabic-Broad-Leaderboard/discussions")
|
|
|
|
| 81 |
return styled_error(error_msg)
|
| 82 |
|
| 83 |
# Seems good, creating the eval
|
| 84 |
+
print("Preparing a new eval")
|
| 85 |
|
| 86 |
eval_entry = {
|
| 87 |
"model": model,
|
| 88 |
+
"model_sha": model_info.sha,
|
| 89 |
+
#"base_model": base_model,
|
| 90 |
+
#"revision": revision,
|
| 91 |
+
#"precision": precision,
|
| 92 |
+
#"weight_type": weight_type,
|
| 93 |
"status": "PENDING",
|
| 94 |
"submitted_time": current_time,
|
| 95 |
+
#"model_type": model_type,
|
| 96 |
"likes": model_info.likes,
|
| 97 |
"params": model_size,
|
| 98 |
"license": license,
|
| 99 |
+
#"private": False,
|
| 100 |
}
|
| 101 |
|
| 102 |
# Check for duplicate submission
|
| 103 |
+
if f"{model}" in REQUESTED_MODELS: #_{revision}_{precision}
|
| 104 |
return styled_warning("This model has been already submitted.")
|
| 105 |
|
| 106 |
print("Creating eval file")
|
| 107 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
| 108 |
os.makedirs(OUT_DIR, exist_ok=True)
|
| 109 |
+
out_path = f"{OUT_DIR}/{model_path}_eval_request.json" #_{precision}_{weight_type}
|
| 110 |
|
| 111 |
with open(out_path, "w") as f:
|
| 112 |
f.write(json.dumps(eval_entry))
|
|
|
|
| 114 |
|
| 115 |
##update queue file
|
| 116 |
queue_file_path = "./eval_queue.json"
|
| 117 |
+
|
| 118 |
+
## download queue_file from repo using HuggingFace hub API, update it and upload again
|
| 119 |
queue_file = hf_hub_download(
|
| 120 |
filename=queue_file_path,
|
| 121 |
repo_id=QUEUE_REPO,
|
|
|
|
| 126 |
|
| 127 |
with open(queue_file, "r") as f:
|
| 128 |
queue_data = json.load(f)
|
| 129 |
+
|
| 130 |
if len(queue_data) == 0:
|
| 131 |
queue_data = []
|
| 132 |
+
|
| 133 |
queue_data.append(eval_entry)
|
| 134 |
+
|
| 135 |
print(queue_data)
|
| 136 |
+
|
| 137 |
#with open(queue_file, "w") as f:
|
| 138 |
# json.dump(queue_data, f)
|
| 139 |
+
|
| 140 |
+
print("Updating eval queue file")
|
| 141 |
API.upload_file(
|
| 142 |
path_or_fileobj=json.dumps(queue_data, indent=2).encode("utf-8"),
|
| 143 |
path_in_repo=queue_file_path,
|
| 144 |
repo_id=QUEUE_REPO,
|
| 145 |
+
repo_type="space",
|
| 146 |
+
commit_message=f"Add {model} to eval queue"
|
| 147 |
)
|
| 148 |
|
| 149 |
|
|
|
|
| 153 |
path_in_repo=out_path,
|
| 154 |
repo_id=QUEUE_REPO,
|
| 155 |
repo_type="space",
|
| 156 |
+
commit_message=f"Add {model} request file",
|
| 157 |
)
|
| 158 |
|
| 159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
# Remove the local file
|
| 161 |
os.remove(out_path)
|
| 162 |
|
| 163 |
+
|
| 164 |
return styled_message(
|
| 165 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an 15 minutes for the model to show in the PENDING list."
|
| 166 |
)
|