Spaces:
Running
Running
| # This module handles interfacing with the huggingface api | |
| from typing import Literal | |
| from datetime import datetime | |
| from huggingface_hub import HfApi | |
| from huggingface_hub.errors import RepositoryNotFoundError | |
| from datasets import load_dataset, concatenate_datasets, Dataset, Features, Value | |
| from datasets.exceptions import DatasetNotFoundError | |
| api = HfApi() | |
| LEADERBOARD_ID = "KoelLabs/_IPA-TRANSCRIPTION-EN-SCORES" | |
| LEADERBOARD_FEATURES = Features( | |
| { | |
| "display_name": Value("string"), | |
| "repo_id": Value("string"), | |
| "repo_hash": Value("string"), | |
| "repo_last_modified": Value("timestamp[s, tz=UTC]"), | |
| "submission_timestamp": Value("timestamp[s, tz=UTC]"), | |
| "average_per": Value("float32"), | |
| "average_fer": Value("float32"), | |
| "url": Value("string"), | |
| "fer_TIMIT": Value("float32"), | |
| "fer_EpaDB": Value("float32"), | |
| "fer_PSST": Value("float32"), | |
| "fer_SpeechOcean": Value("float32"), | |
| "fer_ISLE": Value("float32"), | |
| } | |
| ) | |
| LEADERBOARD_DEFAULTS = { | |
| "url": "", | |
| "fer_TIMIT": None, | |
| "fer_EpaDB": None, | |
| "fer_PSST": None, | |
| "fer_SpeechOcean": None, | |
| "fer_ISLE": None, | |
| } | |
| def get_repo_info( | |
| repo_id, type: Literal["model", "dataset", "space"] = "model" | |
| ) -> tuple[str, datetime]: | |
| try: | |
| repo_info = api.repo_info(repo_id=repo_id, repo_type=type) | |
| return repo_info.sha, repo_info.last_modified # type: ignore | |
| except RepositoryNotFoundError: | |
| return "", datetime(year=1970, month=1, day=1) | |
| def get_or_create_leaderboard() -> Dataset: | |
| modified = False | |
| try: | |
| dataset: Dataset = load_dataset(LEADERBOARD_ID)["train"] # type: ignore | |
| except DatasetNotFoundError: | |
| empty_data = {col: [] for col in LEADERBOARD_FEATURES.keys()} | |
| dataset = Dataset.from_dict(empty_data, features=LEADERBOARD_FEATURES) | |
| modified = True | |
| except ValueError: | |
| empty_data = {col: [] for col in LEADERBOARD_FEATURES.keys()} | |
| dataset = Dataset.from_dict(empty_data, features=LEADERBOARD_FEATURES) | |
| for col in LEADERBOARD_FEATURES.keys(): | |
| if col not in dataset.column_names: | |
| modified = True | |
| dataset = dataset.add_column(col, [LEADERBOARD_DEFAULTS.get(col)] * len(dataset)) # type: ignore | |
| dataset = dataset.cast_column(col, feature=LEADERBOARD_FEATURES[col]) | |
| if modified: | |
| dataset.push_to_hub(LEADERBOARD_ID, private=True) | |
| return dataset | |
| def add_leaderboard_entry( | |
| display_name: str, | |
| repo_id: str, | |
| repo_hash: str, | |
| repo_last_modified: datetime, | |
| submission_timestamp: datetime, | |
| average_per: float, | |
| average_fer: float, | |
| url: str, | |
| per_dataset_fers: dict = {}, | |
| ): | |
| existing_dataset = get_or_create_leaderboard() | |
| new_row = Dataset.from_dict( | |
| dict( | |
| display_name=[display_name], | |
| repo_id=[repo_id], | |
| repo_hash=[repo_hash], | |
| repo_last_modified=[repo_last_modified.replace(microsecond=0)], | |
| submission_timestamp=[submission_timestamp.replace(microsecond=0)], | |
| average_per=[average_per], | |
| average_fer=[average_fer], | |
| url=[url], | |
| fer_TIMIT=[per_dataset_fers.get("TIMIT")], | |
| fer_EpaDB=[per_dataset_fers.get("EpaDB")], | |
| fer_PSST=[per_dataset_fers.get("PSST")], | |
| fer_SpeechOcean=[per_dataset_fers.get("SpeechOcean")], | |
| fer_ISLE=[per_dataset_fers.get("ISLE")], | |
| ), | |
| features=LEADERBOARD_FEATURES, | |
| ) | |
| combined_dataset = concatenate_datasets([existing_dataset, new_row]) | |
| combined_dataset.push_to_hub(LEADERBOARD_ID, private=True) | |
| if __name__ == "__main__": | |
| print(get_repo_info(LEADERBOARD_ID, type="dataset")) | |
| print(get_or_create_leaderboard().to_pandas().head(5)) # type: ignore | |