Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
t0-0
commited on
Commit
·
cf10aa9
1
Parent(s):
567d2b9
Add information to Task
Browse files- src/about.py +82 -53
- src/display/utils.py +12 -2
src/about.py
CHANGED
|
@@ -2,78 +2,107 @@ from dataclasses import dataclass
|
|
| 2 |
from enum import Enum
|
| 3 |
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
@dataclass
|
| 6 |
class Task:
|
| 7 |
benchmark: str
|
| 8 |
metric: str
|
| 9 |
col_name: str
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
# Select your tasks here
|
| 13 |
# ---------------------------------------------------
|
| 14 |
class Tasks(Enum):
|
| 15 |
-
AVG = Task("scores", "AVG", "AVG")
|
| 16 |
-
CG = Task("scores", "CG", "CG - コード生成") # Code Generation - コード生成
|
| 17 |
-
EL = Task(
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
jsts_pearson = Task(
|
| 45 |
-
"scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度"
|
| 46 |
) # Semantic Textual Similarity - 意味的類似度
|
| 47 |
jsts_spearman = Task(
|
| 48 |
-
"scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度"
|
| 49 |
) # Semantic Textual Similarity - 意味的類似度
|
| 50 |
-
kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI")
|
| 51 |
-
mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS")
|
| 52 |
-
mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP(exec)")
|
| 53 |
-
mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP(pylint)")
|
| 54 |
-
mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU")
|
| 55 |
-
niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC")
|
| 56 |
-
wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference")
|
| 57 |
-
wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency")
|
| 58 |
-
wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER")
|
| 59 |
-
wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS")
|
| 60 |
-
wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading")
|
| 61 |
wikicorpus_e_to_j_bert_score_ja_f1 = Task(
|
| 62 |
-
"scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
)
|
| 64 |
-
wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU")
|
| 65 |
-
wikicorpus_e_to_j_comet_wmt22 = Task("scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22")
|
| 66 |
wikicorpus_j_to_e_bert_score_en_f1 = Task(
|
| 67 |
-
"scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
)
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
xlsum_ja_rouge1 = Task("scores", "xlsum_ja_rouge1", "XL-Sum ROUGE1")
|
| 74 |
-
xlsum_ja_rouge2 = Task("scores", "xlsum_ja_rouge2", "XL-Sum ROUGE2")
|
| 75 |
# xlsum_ja_rouge2_scaling = Task("scores", "xlsum_ja_rouge2_scaling", "XL-Sum JA ROUGE2 Scaling")
|
| 76 |
-
xlsum_ja_rougeLsum = Task("scores", "xlsum_ja_rougeLsum", "XL-Sum ROUGE-Lsum")
|
| 77 |
|
| 78 |
|
| 79 |
NUM_FEWSHOT = 0 # Change with your few shot
|
|
|
|
| 2 |
from enum import Enum
|
| 3 |
|
| 4 |
|
| 5 |
+
class TaskType(Enum):
|
| 6 |
+
ALL = "ALL"
|
| 7 |
+
NLI = "NLI"
|
| 8 |
+
QA = "QA"
|
| 9 |
+
RC = "RC"
|
| 10 |
+
MC = "MC"
|
| 11 |
+
EL = "EL"
|
| 12 |
+
FA = "FA"
|
| 13 |
+
MR = "MR"
|
| 14 |
+
MT = "MT"
|
| 15 |
+
STS = "STS"
|
| 16 |
+
HE = "HE"
|
| 17 |
+
CG = "CG"
|
| 18 |
+
SUM = "SUM"
|
| 19 |
+
NotTask = "?"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
@dataclass
|
| 23 |
class Task:
|
| 24 |
benchmark: str
|
| 25 |
metric: str
|
| 26 |
col_name: str
|
| 27 |
+
task_type: TaskType
|
| 28 |
+
average: bool = False
|
| 29 |
|
| 30 |
|
| 31 |
# Select your tasks here
|
| 32 |
# ---------------------------------------------------
|
| 33 |
class Tasks(Enum):
|
| 34 |
+
AVG = Task("scores", "AVG", "AVG", TaskType.ALL, True)
|
| 35 |
+
CG = Task("scores", "CG", "CG - コード生成", TaskType.CG, True) # Code Generation - コード生成
|
| 36 |
+
EL = Task(
|
| 37 |
+
"scores", "EL", "EL - エンティティリンキング", TaskType.EL, True
|
| 38 |
+
) # Entity Linking - エンティティリンキング
|
| 39 |
+
FA = Task("scores", "FA", "FA - 基礎分析", TaskType.FA, True) # Fundamental Analysis - 基礎分析
|
| 40 |
+
HE = Task("scores", "HE", "HE - 言語理解", TaskType.HE, True) # Human Examination - 言語理解
|
| 41 |
+
MC = Task(
|
| 42 |
+
"scores", "MC", "MC - 多肢選択式質問応答", TaskType.MC, True
|
| 43 |
+
) # Multiple Choice question answering - 多肢選択式質問応答
|
| 44 |
+
MR = Task("scores", "MR", "MR - 数学的推論", TaskType.MR, True) # Mathematical Reasoning - 数学的推論
|
| 45 |
+
MT = Task("scores", "MT", "MT - 機械翻訳", TaskType.MT, True) # Machine Translation - 機械翻訳
|
| 46 |
+
NLI = Task("scores", "NLI", "NLI - 自然言語推論", TaskType.NLI, True) # Natural Language Inference - 自然言語推論
|
| 47 |
+
QA = Task("scores", "QA", "QA - 質問応答", TaskType.QA, True) # Question Answering - 質問応答
|
| 48 |
+
RC = Task("scores", "RC", "RC - 読解力", TaskType.RC, True) # Reading Comprehension - 読解力
|
| 49 |
+
SUM = Task("scores", "SUM", "SUM - 要約", TaskType.SUM, True) # Summarization - 要約
|
| 50 |
+
alt_e_to_j_bert_score_ja_f1 = Task("scores", "alt-e-to-j_bert_score_ja_f1", "ALT E to J BERT Score", TaskType.MT)
|
| 51 |
+
alt_e_to_j_bleu_ja = Task("scores", "alt-e-to-j_bleu_ja", "ALT E to J BLEU", TaskType.MT)
|
| 52 |
+
alt_e_to_j_comet_wmt22 = Task("scores", "alt-e-to-j_comet_wmt22", "ALT E to J COMET WMT22", TaskType.MT)
|
| 53 |
+
alt_j_to_e_bert_score_en_f1 = Task("scores", "alt-j-to-e_bert_score_en_f1", "ALT J to E BERT Score", TaskType.MT)
|
| 54 |
+
alt_j_to_e_bleu_en = Task("scores", "alt-j-to-e_bleu_en", "ALT J to E BLEU", TaskType.MT)
|
| 55 |
+
alt_j_to_e_comet_wmt22 = Task("scores", "alt-j-to-e_comet_wmt22", "ALT J to E COMET WMT22", TaskType.MT)
|
| 56 |
+
chabsa_set_f1 = Task("scores", "chabsa_set_f1", "ChABSA", TaskType.EL)
|
| 57 |
+
commonsensemoralja_exact_match = Task(
|
| 58 |
+
"scores", "commonsensemoralja_exact_match", "CommonSenseMoralJA", TaskType.MC
|
| 59 |
+
)
|
| 60 |
+
jamp_exact_match = Task("scores", "jamp_exact_match", "JAMP", TaskType.NLI)
|
| 61 |
+
janli_exact_match = Task("scores", "janli_exact_match", "JANLI", TaskType.NLI)
|
| 62 |
+
jcommonsenseqa_exact_match = Task("scores", "jcommonsenseqa_exact_match", "JCommonSenseQA", TaskType.MC)
|
| 63 |
+
jemhopqa_char_f1 = Task("scores", "jemhopqa_char_f1", "JEMHopQA", TaskType.QA)
|
| 64 |
+
jmmlu_exact_match = Task("scores", "jmmlu_exact_match", "JMMLU", TaskType.HE)
|
| 65 |
+
jnli_exact_match = Task("scores", "jnli_exact_match", "JNLI", TaskType.NLI)
|
| 66 |
+
jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM", TaskType.NLI)
|
| 67 |
+
jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK", TaskType.NLI)
|
| 68 |
+
jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad", TaskType.RC)
|
| 69 |
jsts_pearson = Task(
|
| 70 |
+
"scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度", TaskType.STS
|
| 71 |
) # Semantic Textual Similarity - 意味的類似度
|
| 72 |
jsts_spearman = Task(
|
| 73 |
+
"scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度", TaskType.STS
|
| 74 |
) # Semantic Textual Similarity - 意味的類似度
|
| 75 |
+
kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI", TaskType.MC)
|
| 76 |
+
mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS", TaskType.MR)
|
| 77 |
+
mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP(exec)", TaskType.CG)
|
| 78 |
+
mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP(pylint)", TaskType.CG)
|
| 79 |
+
mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU", TaskType.HE)
|
| 80 |
+
niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC", TaskType.QA)
|
| 81 |
+
wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference", TaskType.FA)
|
| 82 |
+
wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency", TaskType.FA)
|
| 83 |
+
wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER", TaskType.FA)
|
| 84 |
+
wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS", TaskType.FA)
|
| 85 |
+
wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading", TaskType.FA)
|
| 86 |
wikicorpus_e_to_j_bert_score_ja_f1 = Task(
|
| 87 |
+
"scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score", TaskType.MT
|
| 88 |
+
)
|
| 89 |
+
wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU", TaskType.MT)
|
| 90 |
+
wikicorpus_e_to_j_comet_wmt22 = Task(
|
| 91 |
+
"scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22", TaskType.MT
|
| 92 |
)
|
|
|
|
|
|
|
| 93 |
wikicorpus_j_to_e_bert_score_en_f1 = Task(
|
| 94 |
+
"scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score", TaskType.MT
|
| 95 |
+
)
|
| 96 |
+
wikicorpus_j_to_e_bleu_en = Task("scores", "wikicorpus-j-to-e_bleu_en", "WikiCorpus J to E BLEU", TaskType.MT)
|
| 97 |
+
wikicorpus_j_to_e_comet_wmt22 = Task(
|
| 98 |
+
"scores", "wikicorpus-j-to-e_comet_wmt22", "WikiCorpus J to E COMET WMT22", TaskType.MT
|
| 99 |
)
|
| 100 |
+
xlsum_ja_bert_score_ja_f1 = Task("scores", "xlsum_ja_bert_score_ja_f1", "XL-Sum JA BERT Score", TaskType.SUM)
|
| 101 |
+
xlsum_ja_bleu_ja = Task("scores", "xlsum_ja_bleu_ja", "XL-Sum JA BLEU", TaskType.SUM)
|
| 102 |
+
xlsum_ja_rouge1 = Task("scores", "xlsum_ja_rouge1", "XL-Sum ROUGE1", TaskType.SUM)
|
| 103 |
+
xlsum_ja_rouge2 = Task("scores", "xlsum_ja_rouge2", "XL-Sum ROUGE2", TaskType.SUM)
|
|
|
|
|
|
|
| 104 |
# xlsum_ja_rouge2_scaling = Task("scores", "xlsum_ja_rouge2_scaling", "XL-Sum JA ROUGE2 Scaling")
|
| 105 |
+
xlsum_ja_rougeLsum = Task("scores", "xlsum_ja_rougeLsum", "XL-Sum ROUGE-Lsum", TaskType.SUM)
|
| 106 |
|
| 107 |
|
| 108 |
NUM_FEWSHOT = 0 # Change with your few shot
|
src/display/utils.py
CHANGED
|
@@ -3,7 +3,7 @@ from enum import Enum
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
-
from src.about import Tasks
|
| 7 |
|
| 8 |
|
| 9 |
def fields(raw_class):
|
|
@@ -21,6 +21,8 @@ class ColumnContent:
|
|
| 21 |
hidden: bool = False
|
| 22 |
never_hidden: bool = False
|
| 23 |
dummy: bool = False
|
|
|
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
## Leaderboard columns
|
|
@@ -31,7 +33,15 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
|
|
| 31 |
# Scores
|
| 32 |
# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 33 |
for task in Tasks:
|
| 34 |
-
auto_eval_column_dict.append(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# Model information
|
| 36 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 37 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
|
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
+
from src.about import Tasks, TaskType
|
| 7 |
|
| 8 |
|
| 9 |
def fields(raw_class):
|
|
|
|
| 21 |
hidden: bool = False
|
| 22 |
never_hidden: bool = False
|
| 23 |
dummy: bool = False
|
| 24 |
+
task_type: TaskType = TaskType.NotTask
|
| 25 |
+
average: bool = False
|
| 26 |
|
| 27 |
|
| 28 |
## Leaderboard columns
|
|
|
|
| 33 |
# Scores
|
| 34 |
# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 35 |
for task in Tasks:
|
| 36 |
+
auto_eval_column_dict.append(
|
| 37 |
+
[
|
| 38 |
+
task.name,
|
| 39 |
+
ColumnContent,
|
| 40 |
+
ColumnContent(
|
| 41 |
+
task.value.col_name, "number", True, task_type=task.value.task_type, average=task.value.average
|
| 42 |
+
),
|
| 43 |
+
]
|
| 44 |
+
)
|
| 45 |
# Model information
|
| 46 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 47 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|