Spaces:
Runtime error
Runtime error
Add persistent storage for logging
Browse files- .gitignore +4 -1
- app.py +23 -15
- requirements.txt +1 -0
- utils.py +29 -1
.gitignore
CHANGED
|
@@ -128,4 +128,7 @@ dmypy.json
|
|
| 128 |
# Pyre type checker
|
| 129 |
.pyre/
|
| 130 |
|
| 131 |
-
scratch/
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
# Pyre type checker
|
| 129 |
.pyre/
|
| 130 |
|
| 131 |
+
scratch/
|
| 132 |
+
|
| 133 |
+
# Evaluation job logs
|
| 134 |
+
evaluation-job-logs/
|
app.py
CHANGED
|
@@ -13,6 +13,7 @@ from tqdm import tqdm
|
|
| 13 |
|
| 14 |
from evaluation import filter_evaluated_models
|
| 15 |
from utils import (
|
|
|
|
| 16 |
format_col_mapping,
|
| 17 |
get_compatible_models,
|
| 18 |
get_key,
|
|
@@ -69,7 +70,7 @@ def get_supported_metrics():
|
|
| 69 |
metric_func = load(metric)
|
| 70 |
except Exception as e:
|
| 71 |
print(e)
|
| 72 |
-
print("Skipping the following metric, which cannot load:", metric)
|
| 73 |
continue
|
| 74 |
|
| 75 |
argspec = inspect.getfullargspec(metric_func.compute)
|
|
@@ -128,7 +129,7 @@ st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
|
| 128 |
|
| 129 |
|
| 130 |
metadata = get_metadata(selected_dataset)
|
| 131 |
-
print(metadata)
|
| 132 |
if metadata is None:
|
| 133 |
st.warning("No evaluation metadata found. Please configure the evaluation job below.")
|
| 134 |
|
|
@@ -352,7 +353,7 @@ with st.form(key="form"):
|
|
| 352 |
help="""Don't see your model in this list? Add the dataset and task it was trained to the \
|
| 353 |
[model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""",
|
| 354 |
)
|
| 355 |
-
print("Selected models:", selected_models)
|
| 356 |
|
| 357 |
if len(selected_models) > 0:
|
| 358 |
selected_models = filter_evaluated_models(
|
|
@@ -362,14 +363,14 @@ with st.form(key="form"):
|
|
| 362 |
selected_config,
|
| 363 |
selected_split,
|
| 364 |
)
|
| 365 |
-
print("Selected models:", selected_models)
|
| 366 |
|
| 367 |
-
submit_button = st.form_submit_button("Evaluate models")
|
| 368 |
|
| 369 |
if submit_button:
|
| 370 |
if len(selected_models) > 0:
|
| 371 |
project_id = str(uuid.uuid4())[:8]
|
| 372 |
-
|
| 373 |
"username": AUTOTRAIN_USERNAME,
|
| 374 |
"proj_name": f"eval-project-{project_id}",
|
| 375 |
"task": TASK_TO_ID[selected_task],
|
|
@@ -391,24 +392,24 @@ with st.form(key="form"):
|
|
| 391 |
},
|
| 392 |
},
|
| 393 |
}
|
| 394 |
-
print(f"Payload: {
|
| 395 |
project_json_resp = http_post(
|
| 396 |
path="/projects/create",
|
| 397 |
-
payload=
|
| 398 |
token=HF_TOKEN,
|
| 399 |
domain=AUTOTRAIN_BACKEND_API,
|
| 400 |
).json()
|
| 401 |
-
print(project_json_resp)
|
| 402 |
|
| 403 |
if project_json_resp["created"]:
|
| 404 |
-
|
| 405 |
"split": 4, # use "auto" split choice in AutoTrain
|
| 406 |
"col_mapping": col_mapping,
|
| 407 |
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
| 408 |
}
|
| 409 |
data_json_resp = http_post(
|
| 410 |
path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
|
| 411 |
-
payload=
|
| 412 |
token=HF_TOKEN,
|
| 413 |
domain=AUTOTRAIN_BACKEND_API,
|
| 414 |
params={
|
|
@@ -417,24 +418,31 @@ with st.form(key="form"):
|
|
| 417 |
"split_name": selected_split,
|
| 418 |
},
|
| 419 |
).json()
|
| 420 |
-
print(data_json_resp)
|
| 421 |
if data_json_resp["download_status"] == 1:
|
| 422 |
train_json_resp = http_get(
|
| 423 |
path=f"/projects/{project_json_resp['id']}/data/start_process",
|
| 424 |
token=HF_TOKEN,
|
| 425 |
domain=AUTOTRAIN_BACKEND_API,
|
| 426 |
).json()
|
| 427 |
-
print(train_json_resp)
|
| 428 |
if train_json_resp["success"]:
|
| 429 |
-
st.success(f"β
Successfully submitted evaluation job with project
|
| 430 |
st.markdown(
|
| 431 |
f"""
|
| 432 |
-
Evaluation
|
| 433 |
|
| 434 |
π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
|
| 435 |
to view the results from your submission
|
| 436 |
"""
|
| 437 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
else:
|
| 439 |
st.error("π Oh no, there was an error submitting your evaluation job!")
|
| 440 |
else:
|
|
|
|
| 13 |
|
| 14 |
from evaluation import filter_evaluated_models
|
| 15 |
from utils import (
|
| 16 |
+
commit_evaluation_log,
|
| 17 |
format_col_mapping,
|
| 18 |
get_compatible_models,
|
| 19 |
get_key,
|
|
|
|
| 70 |
metric_func = load(metric)
|
| 71 |
except Exception as e:
|
| 72 |
print(e)
|
| 73 |
+
print("WARNING -- Skipping the following metric, which cannot load:", metric)
|
| 74 |
continue
|
| 75 |
|
| 76 |
argspec = inspect.getfullargspec(metric_func.compute)
|
|
|
|
| 129 |
|
| 130 |
|
| 131 |
metadata = get_metadata(selected_dataset)
|
| 132 |
+
print(f"INFO -- Dataset metadata: {metadata}")
|
| 133 |
if metadata is None:
|
| 134 |
st.warning("No evaluation metadata found. Please configure the evaluation job below.")
|
| 135 |
|
|
|
|
| 353 |
help="""Don't see your model in this list? Add the dataset and task it was trained to the \
|
| 354 |
[model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""",
|
| 355 |
)
|
| 356 |
+
print("INFO -- Selected models before filter:", selected_models)
|
| 357 |
|
| 358 |
if len(selected_models) > 0:
|
| 359 |
selected_models = filter_evaluated_models(
|
|
|
|
| 363 |
selected_config,
|
| 364 |
selected_split,
|
| 365 |
)
|
| 366 |
+
print("INFO -- Selected models after filter:", selected_models)
|
| 367 |
|
| 368 |
+
submit_button = st.form_submit_button("Evaluate models π")
|
| 369 |
|
| 370 |
if submit_button:
|
| 371 |
if len(selected_models) > 0:
|
| 372 |
project_id = str(uuid.uuid4())[:8]
|
| 373 |
+
project_payload = {
|
| 374 |
"username": AUTOTRAIN_USERNAME,
|
| 375 |
"proj_name": f"eval-project-{project_id}",
|
| 376 |
"task": TASK_TO_ID[selected_task],
|
|
|
|
| 392 |
},
|
| 393 |
},
|
| 394 |
}
|
| 395 |
+
print(f"INFO -- Payload: {project_payload}")
|
| 396 |
project_json_resp = http_post(
|
| 397 |
path="/projects/create",
|
| 398 |
+
payload=project_payload,
|
| 399 |
token=HF_TOKEN,
|
| 400 |
domain=AUTOTRAIN_BACKEND_API,
|
| 401 |
).json()
|
| 402 |
+
print(f"INFO -- Project creation response: {project_json_resp}")
|
| 403 |
|
| 404 |
if project_json_resp["created"]:
|
| 405 |
+
data_payload = {
|
| 406 |
"split": 4, # use "auto" split choice in AutoTrain
|
| 407 |
"col_mapping": col_mapping,
|
| 408 |
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
| 409 |
}
|
| 410 |
data_json_resp = http_post(
|
| 411 |
path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
|
| 412 |
+
payload=data_payload,
|
| 413 |
token=HF_TOKEN,
|
| 414 |
domain=AUTOTRAIN_BACKEND_API,
|
| 415 |
params={
|
|
|
|
| 418 |
"split_name": selected_split,
|
| 419 |
},
|
| 420 |
).json()
|
| 421 |
+
print(f"INFO -- Dataset creation response: {data_json_resp}")
|
| 422 |
if data_json_resp["download_status"] == 1:
|
| 423 |
train_json_resp = http_get(
|
| 424 |
path=f"/projects/{project_json_resp['id']}/data/start_process",
|
| 425 |
token=HF_TOKEN,
|
| 426 |
domain=AUTOTRAIN_BACKEND_API,
|
| 427 |
).json()
|
| 428 |
+
print(f"INFO -- AutoTrain job response: {train_json_resp}")
|
| 429 |
if train_json_resp["success"]:
|
| 430 |
+
st.success(f"β
Successfully submitted evaluation job with project name {project_id}")
|
| 431 |
st.markdown(
|
| 432 |
f"""
|
| 433 |
+
Evaluation can take up to 1 hour to complete, so grab a β or π΅ while you wait:
|
| 434 |
|
| 435 |
π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
|
| 436 |
to view the results from your submission
|
| 437 |
"""
|
| 438 |
)
|
| 439 |
+
print("INFO -- Pushing evaluation job logs to the Hub")
|
| 440 |
+
evaluation_log = {}
|
| 441 |
+
evaluation_log["payload"] = project_payload
|
| 442 |
+
evaluation_log["project_creation_response"] = project_json_resp
|
| 443 |
+
evaluation_log["dataset_creation_response"] = data_json_resp
|
| 444 |
+
evaluation_log["autotrain_job_response"] = train_json_resp
|
| 445 |
+
commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
|
| 446 |
else:
|
| 447 |
st.error("π Oh no, there was an error submitting your evaluation job!")
|
| 448 |
else:
|
requirements.txt
CHANGED
|
@@ -3,6 +3,7 @@ python-dotenv
|
|
| 3 |
streamlit==1.10.0
|
| 4 |
datasets<2.3
|
| 5 |
evaluate<0.2
|
|
|
|
| 6 |
# Dataset specific deps
|
| 7 |
py7zr<0.19
|
| 8 |
openpyxl<3.1
|
|
|
|
| 3 |
streamlit==1.10.0
|
| 4 |
datasets<2.3
|
| 5 |
evaluate<0.2
|
| 6 |
+
jsonlines
|
| 7 |
# Dataset specific deps
|
| 8 |
py7zr<0.19
|
| 9 |
openpyxl<3.1
|
utils.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
from typing import Dict, Union
|
| 2 |
|
|
|
|
| 3 |
import requests
|
| 4 |
-
from huggingface_hub import HfApi, ModelFilter, dataset_info
|
| 5 |
|
| 6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
| 7 |
"binary_classification": "text-classification",
|
|
@@ -15,6 +16,8 @@ AUTOTRAIN_TASK_TO_HUB_TASK = {
|
|
| 15 |
}
|
| 16 |
|
| 17 |
HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
|
|
|
|
|
|
|
| 18 |
|
| 19 |
api = HfApi()
|
| 20 |
|
|
@@ -86,3 +89,28 @@ def format_col_mapping(col_mapping: dict) -> dict:
|
|
| 86 |
col_mapping[f"answers.{k}"] = f"answers.{v}"
|
| 87 |
del col_mapping["answers"]
|
| 88 |
return col_mapping
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from typing import Dict, Union
|
| 2 |
|
| 3 |
+
import jsonlines
|
| 4 |
import requests
|
| 5 |
+
from huggingface_hub import HfApi, ModelFilter, Repository, dataset_info
|
| 6 |
|
| 7 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
| 8 |
"binary_classification": "text-classification",
|
|
|
|
| 16 |
}
|
| 17 |
|
| 18 |
HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
|
| 19 |
+
LOGS_REPO = "evaluation-job-logs"
|
| 20 |
+
|
| 21 |
|
| 22 |
api = HfApi()
|
| 23 |
|
|
|
|
| 89 |
col_mapping[f"answers.{k}"] = f"answers.{v}"
|
| 90 |
del col_mapping["answers"]
|
| 91 |
return col_mapping
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def commit_evaluation_log(evaluation_log, hf_access_token=None):
|
| 95 |
+
logs_repo_url = f"https://huggingface.co/datasets/autoevaluate/{LOGS_REPO}"
|
| 96 |
+
logs_repo = Repository(
|
| 97 |
+
local_dir=LOGS_REPO,
|
| 98 |
+
clone_from=logs_repo_url,
|
| 99 |
+
repo_type="dataset",
|
| 100 |
+
private=True,
|
| 101 |
+
use_auth_token=hf_access_token,
|
| 102 |
+
)
|
| 103 |
+
logs_repo.git_pull()
|
| 104 |
+
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl") as r:
|
| 105 |
+
lines = []
|
| 106 |
+
for obj in r:
|
| 107 |
+
lines.append(obj)
|
| 108 |
+
|
| 109 |
+
lines.append(evaluation_log)
|
| 110 |
+
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl", mode="w") as writer:
|
| 111 |
+
for job in lines:
|
| 112 |
+
writer.write(job)
|
| 113 |
+
logs_repo.push_to_hub(
|
| 114 |
+
commit_message=f"Evaluation submitted with project name {evaluation_log['payload']['proj_name']}"
|
| 115 |
+
)
|
| 116 |
+
print("INFO -- Pushed evaluation logs to the Hub")
|