Spaces:
Runtime error
Runtime error
Add eval caching
Browse files- app.py +20 -0
- evaluation.py +26 -0
- utils.py +1 -1
app.py
CHANGED
|
@@ -8,6 +8,8 @@ from datasets import get_dataset_config_names
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from huggingface_hub import list_datasets
|
| 10 |
|
|
|
|
|
|
|
| 11 |
from utils import (get_compatible_models, get_key, get_metadata, http_get,
|
| 12 |
http_post)
|
| 13 |
|
|
@@ -244,6 +246,24 @@ with st.form(key="form"):
|
|
| 244 |
|
| 245 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
| 246 |
print("Selected models:", selected_models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
submit_button = st.form_submit_button("Make submission")
|
| 248 |
|
| 249 |
if submit_button:
|
|
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from huggingface_hub import list_datasets
|
| 10 |
|
| 11 |
+
from evaluation import (EvaluationInfo, compute_evaluation_id,
|
| 12 |
+
get_evaluation_ids)
|
| 13 |
from utils import (get_compatible_models, get_key, get_metadata, http_get,
|
| 14 |
http_post)
|
| 15 |
|
|
|
|
| 246 |
|
| 247 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
| 248 |
print("Selected models:", selected_models)
|
| 249 |
+
|
| 250 |
+
evaluation_ids = get_evaluation_ids()
|
| 251 |
+
|
| 252 |
+
for idx, model in enumerate(selected_models):
|
| 253 |
+
eval_info = EvaluationInfo(
|
| 254 |
+
task=selected_task,
|
| 255 |
+
model=model,
|
| 256 |
+
dataset_name=selected_dataset,
|
| 257 |
+
dataset_config=selected_config,
|
| 258 |
+
dataset_split=selected_split,
|
| 259 |
+
)
|
| 260 |
+
candidate_id = hash(eval_info)
|
| 261 |
+
if candidate_id in evaluation_ids:
|
| 262 |
+
st.info(f"Model {model} has already been evaluated on this configuration. Skipping ...")
|
| 263 |
+
selected_models.pop(idx)
|
| 264 |
+
|
| 265 |
+
print("Selected models:", selected_models)
|
| 266 |
+
|
| 267 |
submit_button = st.form_submit_button("Make submission")
|
| 268 |
|
| 269 |
if submit_button:
|
evaluation.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
|
| 3 |
+
from huggingface_hub import DatasetFilter, HfApi
|
| 4 |
+
from huggingface_hub.hf_api import DatasetInfo
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataclass(frozen=True, eq=True)
|
| 8 |
+
class EvaluationInfo:
|
| 9 |
+
task: str
|
| 10 |
+
model: str
|
| 11 |
+
dataset_name: str
|
| 12 |
+
dataset_config: str
|
| 13 |
+
dataset_split: str
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
| 17 |
+
metadata = dataset_info.cardData["eval_info"]
|
| 18 |
+
metadata.pop("col_mapping", None)
|
| 19 |
+
evaluation_info = EvaluationInfo(**metadata)
|
| 20 |
+
return hash(evaluation_info)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_evaluation_ids():
|
| 24 |
+
filt = DatasetFilter(author="autoevaluate")
|
| 25 |
+
evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
|
| 26 |
+
return [compute_evaluation_id(dset) for dset in evaluation_datasets]
|
utils.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from typing import Dict, Union
|
| 2 |
|
| 3 |
import requests
|
| 4 |
-
from huggingface_hub import
|
| 5 |
|
| 6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
| 7 |
"binary_classification": "text-classification",
|
|
|
|
| 1 |
from typing import Dict, Union
|
| 2 |
|
| 3 |
import requests
|
| 4 |
+
from huggingface_hub import HfApi, ModelFilter
|
| 5 |
|
| 6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
| 7 |
"binary_classification": "text-classification",
|