Spaces:
Runtime error
Runtime error
Add connection to AutoTrain
Browse files- app.py +36 -20
- requirements.txt +2 -1
- utils.py +37 -0
app.py
CHANGED
|
@@ -1,41 +1,57 @@
|
|
| 1 |
-
import
|
| 2 |
-
from
|
| 3 |
-
|
| 4 |
-
api = HfApi()
|
| 5 |
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
filt = DatasetFilter(dataset_name=dataset_name)
|
| 9 |
-
data = api.list_datasets(filter=filt, full=True)
|
| 10 |
-
return data[0].cardData["train-eval-index"]
|
| 11 |
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
return [model.modelId for model in compatible_models]
|
| 17 |
|
| 18 |
|
| 19 |
with st.form(key="form"):
|
| 20 |
|
| 21 |
-
dataset_name = st.selectbox("Select a dataset to evaluate on", ["lewtun/
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
metadata = get_metadata(dataset_name)
|
| 24 |
-
# st.write(metadata)
|
| 25 |
|
| 26 |
dataset_config = st.selectbox("Select the subset to evaluate on", [metadata[0]["config"]])
|
| 27 |
|
| 28 |
splits = metadata[0]["splits"]
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
evaluation_split = st.selectbox("Select the split to evaluate on", [v for d in splits for k, v in d.items()])
|
| 33 |
|
| 34 |
-
compatible_models = get_compatible_models(metadata[0]["task"],
|
| 35 |
|
| 36 |
-
|
| 37 |
|
| 38 |
submit_button = st.form_submit_button("Make Submission")
|
| 39 |
|
| 40 |
if submit_button:
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
|
| 7 |
+
from utils import get_compatible_models, get_metadata, http_post
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
if Path(".env").is_file():
|
| 10 |
+
load_dotenv(".env")
|
| 11 |
|
| 12 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 13 |
+
AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
|
| 14 |
+
AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
with st.form(key="form"):
|
| 18 |
|
| 19 |
+
dataset_name = st.selectbox("Select a dataset to evaluate on", ["lewtun/autoevaluate__emotion"])
|
| 20 |
|
| 21 |
+
# TODO: remove this step once we select real datasets
|
| 22 |
+
# Strip out original dataset name
|
| 23 |
+
original_dataset_name = dataset_name.split("/")[-1].split("__")[-1]
|
| 24 |
+
|
| 25 |
+
# In general this will be a list of multiple configs => need to generalise logic here
|
| 26 |
metadata = get_metadata(dataset_name)
|
|
|
|
| 27 |
|
| 28 |
dataset_config = st.selectbox("Select the subset to evaluate on", [metadata[0]["config"]])
|
| 29 |
|
| 30 |
splits = metadata[0]["splits"]
|
| 31 |
+
split_names = list(splits.values())
|
| 32 |
+
eval_split = splits.get("eval_split", split_names[0])
|
| 33 |
|
| 34 |
+
selected_split = st.selectbox("Select the split to evaluate on", split_names, index=split_names.index(eval_split))
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
compatible_models = get_compatible_models(metadata[0]["task"], original_dataset_name)
|
| 37 |
|
| 38 |
+
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models, compatible_models[0])
|
| 39 |
|
| 40 |
submit_button = st.form_submit_button("Make Submission")
|
| 41 |
|
| 42 |
if submit_button:
|
| 43 |
+
for model in selected_models:
|
| 44 |
+
payload = {
|
| 45 |
+
"username": AUTOTRAIN_USERNAME,
|
| 46 |
+
"task": 1,
|
| 47 |
+
"model": model,
|
| 48 |
+
"col_mapping": {"sentence": "text", "label": "target"},
|
| 49 |
+
"split": selected_split,
|
| 50 |
+
"dataset": original_dataset_name,
|
| 51 |
+
"config": dataset_config,
|
| 52 |
+
}
|
| 53 |
+
json_resp = http_post(
|
| 54 |
+
path="/evaluate/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
|
| 55 |
+
).json()
|
| 56 |
+
|
| 57 |
+
st.success(f"β
Successfully submitted model {model} for evaluation with job ID {json_resp['id']}")
|
requirements.txt
CHANGED
|
@@ -1 +1,2 @@
|
|
| 1 |
-
huggingface-hub==0.4.0
|
|
|
|
|
|
| 1 |
+
huggingface-hub==0.4.0
|
| 2 |
+
python-dotenv
|
utils.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from huggingface_hub import DatasetFilter, HfApi, ModelFilter
|
| 3 |
+
|
| 4 |
+
api = HfApi()
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def get_auth_headers(token: str, prefix: str = "autonlp"):
|
| 8 |
+
return {"Authorization": f"{prefix} {token}"}
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def http_post(
|
| 12 |
+
path: str,
|
| 13 |
+
token: str,
|
| 14 |
+
payload=None,
|
| 15 |
+
domain: str = None,
|
| 16 |
+
) -> requests.Response:
|
| 17 |
+
"""HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
|
| 18 |
+
try:
|
| 19 |
+
response = requests.post(
|
| 20 |
+
url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True
|
| 21 |
+
)
|
| 22 |
+
except requests.exceptions.ConnectionError:
|
| 23 |
+
print("β Failed to reach AutoNLP API, check your internet connection")
|
| 24 |
+
response.raise_for_status()
|
| 25 |
+
return response
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_metadata(dataset_name):
|
| 29 |
+
filt = DatasetFilter(dataset_name=dataset_name)
|
| 30 |
+
data = api.list_datasets(filter=filt, full=True)
|
| 31 |
+
return data[0].cardData["train-eval-index"]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def get_compatible_models(task, dataset_name):
|
| 35 |
+
filt = ModelFilter(task=task, trained_dataset=dataset_name)
|
| 36 |
+
compatible_models = api.list_models(filter=filt)
|
| 37 |
+
return [model.modelId for model in compatible_models]
|