Spaces:

llm-jp
/

open-japanese-llm-leaderboard

Paused

App Files Files Community

hysts HF Staff commited on Nov 8, 2024

Commit

309aa01

1 Parent(s): 43d4bec

Update submission functionality

Browse files

Files changed (4) hide show

src/display/utils.py +11 -0
src/envs.py +3 -2
src/submission/check_validity.py +21 -32
src/submission/submit.py +43 -46

src/display/utils.py CHANGED Viewed

@@ -81,6 +81,17 @@ class EvalQueueColumn:  # Queue column
     status = ColumnContent("status", "str", True)
 ## All the model information that we might need
 @dataclass
 class ModelDetails:

     status = ColumnContent("status", "str", True)
+# This class is used to store the model data in the queue
+@dataclass(frozen=True)
+class EvalQueuedModel:
+    model: str
+    revision: str
+    precision: str
+    add_special_tokens: str
+    llm_jp_eval_version: str
+    vllm_version: str
 ## All the model information that we might need
 @dataclass
 class ModelDetails:

src/envs.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 from huggingface_hub import HfApi
@@ -14,9 +15,9 @@ QUEUE_REPO = f"{OWNER}/leaderboard-requests"
 CONTENTS_REPO = f"{OWNER}/leaderboard-contents"
 # If you setup a cache later, just change HF_HOME
-CACHE_PATH = os.getenv("HF_HOME", ".")
 # Local caches
-EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 API = HfApi(token=TOKEN)

 import os
+import pathlib
 from huggingface_hub import HfApi
 CONTENTS_REPO = f"{OWNER}/leaderboard-contents"
 # If you setup a cache later, just change HF_HOME
+CACHE_PATH = pathlib.Path(os.getenv("HF_HOME", "."))
 # Local caches
+EVAL_REQUESTS_PATH = CACHE_PATH / "eval-queue"
 API = HfApi(token=TOKEN)

src/submission/check_validity.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import os
-from collections import defaultdict
 import huggingface_hub
 import requests
@@ -9,7 +9,7 @@ from huggingface_hub.hf_api import ModelInfo
 from transformers import AutoConfig
 from transformers.models.auto.tokenization_auto import AutoTokenizer
-from src.display.utils import LLMJpEvalVersion, VllmVersion
 def check_model_card(repo_id: str) -> tuple[bool, str]:
@@ -92,34 +92,23 @@ def get_model_arch(model_info: ModelInfo):
     return model_info.config.get("architectures", "Unknown")
-def already_submitted_models(requested_models_dir: str) -> set[str]:
     """Gather a list of already submitted models to avoid duplicates"""
-    depth = 1
-    file_names = []
-    users_to_submission_dates = defaultdict(list)
-    for root, _, files in os.walk(requested_models_dir):
-        current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
-        if current_depth == depth:
-            for file in files:
-                if not file.endswith(".json"):
-                    continue
-                with open(os.path.join(root, file), "r") as f:
-                    info = json.load(f)
-                    if info["status"] == "FAILED":
-                        continue
-                    # Version.v1_4_1.value.name を使用してバージョン情報を取得
-                    version = info.get("llm_jp_eval_version", LLMJpEvalVersion.current.value.name)
-                    vllm_version = info.get("vllm_version", VllmVersion.current.value.name)
-                    file_names.append(
-                        f"{info['model']}_{info['precision']}_{info['add_special_tokens']}_{version}_{vllm_version}"
-                    )
-                    # Select organisation
-                    if info["model"].count("/") == 0 or "submitted_time" not in info:
-                        continue
-                    organisation, _ = info["model"].split("/")
-                    users_to_submission_dates[organisation].append(info["submitted_time"])
-    return set(file_names), users_to_submission_dates

 import json
 import os
+import pathlib
 import huggingface_hub
 import requests
 from transformers import AutoConfig
 from transformers.models.auto.tokenization_auto import AutoTokenizer
+from src.display.utils import EvalQueuedModel
 def check_model_card(repo_id: str) -> tuple[bool, str]:
     return model_info.config.get("architectures", "Unknown")
+def already_submitted_models(requested_models_dir: pathlib.Path) -> set[EvalQueuedModel]:
     """Gather a list of already submitted models to avoid duplicates"""
+    queued_models = set()
+    for json_path in requested_models_dir.glob("*/*.json"):
+        with json_path.open() as f:
+            info = json.load(f)
+        # Allow failed submissions to be re-submitted
+        if info["status"] == "FAILED":
+            continue
+        queued_models.add(
+            EvalQueuedModel(
+                model=info["model"],
+                revision=info["revision"],
+                precision=info["precision"],
+                add_special_tokens=info["add_special_tokens"],
+                llm_jp_eval_version=info["llm_jp_eval_version"],
+                vllm_version=info["vllm_version"],
+            )
+        )
+    return queued_models

src/submission/submit.py CHANGED Viewed

@@ -1,63 +1,60 @@
 import json
-import os
 from datetime import datetime, timezone
 from src.display.formatting import styled_error, styled_message, styled_warning
-from src.display.utils import LLMJpEvalVersion, VllmVersion
 from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
 from src.submission.check_validity import already_submitted_models, check_model_card, is_model_on_hub
-REQUESTED_MODELS = None
-USERS_TO_SUBMISSION_DATES = None
 def add_new_eval(
-    model: str,
     revision: str,
     precision: str,
     model_type: str,
     add_special_tokens: str,
 ):
     global REQUESTED_MODELS
-    global USERS_TO_SUBMISSION_DATES
     if not REQUESTED_MODELS:
-        REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
-    current_version = LLMJpEvalVersion.current.value.name
-    current_vllm_version = VllmVersion.current.value.name
-    # バージョン情報を含めた重複チェック
-    submission_id = f"{model}_{precision}_{add_special_tokens}_{current_version}_{current_vllm_version}"
-    if submission_id in REQUESTED_MODELS:
-        return styled_warning(
-            f"This model has already been evaluated with llm-jp-eval version {current_version} "
-            f"and vllm version {current_vllm_version}"
-        )
-    user_name = ""
-    model_path = model
-    if "/" in model:
-        user_name = model.split("/")[0]
-        model_path = model.split("/")[1]
-    precision = precision.split(" ")[0]
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
     if model_type is None or model_type == "":
         return styled_error("Please select a model type.")
-    # Does the model actually exist?
-    if revision == "":
-        revision = "main"
     # Is the model on the hub?
-    model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
     if not model_on_hub:
-        return styled_error(f'Model "{model}" {error}')
     # Is the model info correctly filled?
     try:
-        model_info = API.model_info(repo_id=model, revision=revision)
     except Exception:
         return styled_error("Could not get your model information. Please fill it up properly.")
@@ -67,7 +64,7 @@ def add_new_eval(
     except Exception:
         return styled_error("Please select a license for your model")
-    modelcard_OK, error_msg = check_model_card(model)
     if not modelcard_OK:
         return styled_error(error_msg)
@@ -76,37 +73,37 @@ def add_new_eval(
     eval_entry = {
         "model_type": model_type,
-        "model": model,
         "precision": precision,
         "revision": revision,
         "add_special_tokens": add_special_tokens,
-        "llm_jp_eval_version": current_version,
-        "vllm_version": current_vllm_version,
         "status": "PENDING",
         "submitted_time": current_time,
     }
     print("Creating eval file")
-    OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
-    os.makedirs(OUT_DIR, exist_ok=True)
-    out_path = (
-        f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{add_special_tokens}_{current_vllm_version}.json"
-    )
-    with open(out_path, "w") as f:
         f.write(json.dumps(eval_entry))
     print("Uploading eval file")
     API.upload_file(
         path_or_fileobj=out_path,
-        path_in_repo=out_path.split("eval-queue/")[1],
         repo_id=QUEUE_REPO,
         repo_type="dataset",
-        commit_message=f"Add {model} to eval queue",
     )
     # Remove the local file
-    os.remove(out_path)
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."

 import json
 from datetime import datetime, timezone
 from src.display.formatting import styled_error, styled_message, styled_warning
+from src.display.utils import EvalQueuedModel, LLMJpEvalVersion, VllmVersion
 from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
 from src.submission.check_validity import already_submitted_models, check_model_card, is_model_on_hub
+REQUESTED_MODELS: set[EvalQueuedModel] = set()
+LLM_JP_EVAL_VERSION = LLMJpEvalVersion.current.value.name
+VLLM_VERSION = VllmVersion.current.value.name
 def add_new_eval(
+    model_id: str,
     revision: str,
     precision: str,
     model_type: str,
     add_special_tokens: str,
 ):
     global REQUESTED_MODELS
     if not REQUESTED_MODELS:
+        REQUESTED_MODELS = already_submitted_models(EVAL_REQUESTS_PATH)
+    revision = revision or "main"
+    model_data = EvalQueuedModel(
+        model=model_id,
+        revision=revision,
+        precision=precision,
+        add_special_tokens=add_special_tokens,
+        llm_jp_eval_version=LLM_JP_EVAL_VERSION,
+        vllm_version=VLLM_VERSION,
+    )
+    if model_data in REQUESTED_MODELS:
+        return styled_warning("This model has already been submitted with the same configuration.")
+    if "/" in model_id:
+        user_or_org, model_name = model_id.split("/")
+    else:
+        user_or_org, model_name = "", model_id
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
     if model_type is None or model_type == "":
         return styled_error("Please select a model type.")
     # Is the model on the hub?
+    model_on_hub, error, _ = is_model_on_hub(model_name=model_id, revision=revision, token=TOKEN, test_tokenizer=True)
     if not model_on_hub:
+        return styled_error(f'Model "{model_id}" {error}')
     # Is the model info correctly filled?
     try:
+        model_info = API.model_info(repo_id=model_id, revision=revision)
     except Exception:
         return styled_error("Could not get your model information. Please fill it up properly.")
     except Exception:
         return styled_error("Please select a license for your model")
+    modelcard_OK, error_msg = check_model_card(model_id)
     if not modelcard_OK:
         return styled_error(error_msg)
     eval_entry = {
         "model_type": model_type,
+        "model": model_id,
         "precision": precision,
         "revision": revision,
         "add_special_tokens": add_special_tokens,
+        "llm_jp_eval_version": LLM_JP_EVAL_VERSION,
+        "vllm_version": VLLM_VERSION,
         "status": "PENDING",
         "submitted_time": current_time,
     }
     print("Creating eval file")
+    OUT_DIR = EVAL_REQUESTS_PATH / user_or_org
+    OUT_DIR.mkdir(parents=True, exist_ok=True)
+    out_file_name = f"{model_name}_eval_request_False_{precision}_{add_special_tokens}_{VLLM_VERSION}.json"
+    out_path = OUT_DIR / out_file_name
+    with out_path.open("w") as f:
         f.write(json.dumps(eval_entry))
     print("Uploading eval file")
     API.upload_file(
         path_or_fileobj=out_path,
+        path_in_repo=out_path.relative_to(EVAL_REQUESTS_PATH).as_posix(),
         repo_id=QUEUE_REPO,
         repo_type="dataset",
+        commit_message=f"Add {model_id} to eval queue",
     )
+    REQUESTED_MODELS.add(model_data)
     # Remove the local file
+    out_path.unlink()
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."