Spaces:
Runtime error
Runtime error
Commit
·
817663f
1
Parent(s):
96fcd80
Add Polish Overall
Browse files
app.py
CHANGED
|
@@ -57,6 +57,16 @@ TASK_LIST_CLASSIFICATION_NB = [
|
|
| 57 |
"ScalaNbClassification",
|
| 58 |
]
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
TASK_LIST_CLASSIFICATION_SV = [
|
| 61 |
"DalajClassification",
|
| 62 |
"MassiveIntentClassification (sv)",
|
|
@@ -102,6 +112,10 @@ TASK_LIST_CLUSTERING_DE = [
|
|
| 102 |
"TenKGnadClusteringS2S",
|
| 103 |
]
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
TASK_LIST_CLUSTERING_ZH = [
|
| 106 |
"CLSClusteringP2P",
|
| 107 |
"CLSClusteringS2S",
|
|
@@ -115,6 +129,13 @@ TASK_LIST_PAIR_CLASSIFICATION = [
|
|
| 115 |
"TwitterURLCorpus",
|
| 116 |
]
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
TASK_LIST_PAIR_CLASSIFICATION_ZH = [
|
| 119 |
"Cmnli",
|
| 120 |
"Ocnli",
|
|
@@ -205,6 +226,12 @@ TASK_LIST_STS = [
|
|
| 205 |
"STSBenchmark",
|
| 206 |
]
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
TASK_LIST_STS_ZH = [
|
| 209 |
"AFQMC",
|
| 210 |
"ATEC",
|
|
@@ -222,6 +249,7 @@ TASK_LIST_STS_NORM = [x.replace(" (en)", "").replace(" (en-en)", "") for x in TA
|
|
| 222 |
TASK_LIST_SUMMARIZATION = ["SummEval",]
|
| 223 |
|
| 224 |
TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
|
|
|
|
| 225 |
TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH
|
| 226 |
|
| 227 |
TASK_TO_METRIC = {
|
|
@@ -298,6 +326,8 @@ EXTERNAL_MODELS = [
|
|
| 298 |
"sentence-t5-xl",
|
| 299 |
"sentence-t5-xxl",
|
| 300 |
"sup-simcse-bert-base-uncased",
|
|
|
|
|
|
|
| 301 |
"text2vec-base-chinese",
|
| 302 |
"text2vec-large-chinese",
|
| 303 |
"text-embedding-ada-002",
|
|
@@ -371,6 +401,8 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
| 371 |
"sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
|
| 372 |
"sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
|
| 373 |
"sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
|
|
|
|
|
|
|
| 374 |
"text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
|
| 375 |
"text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
|
| 376 |
"text-embedding-ada-002": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
|
@@ -444,6 +476,8 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
| 444 |
"sentence-t5-xl": 768,
|
| 445 |
"sentence-t5-xxl": 768,
|
| 446 |
"sup-simcse-bert-base-uncased": 768,
|
|
|
|
|
|
|
| 447 |
"text2vec-base-chinese": 768,
|
| 448 |
"text2vec-large-chinese": 1024,
|
| 449 |
"text-embedding-ada-002": 1536,
|
|
@@ -517,6 +551,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
| 517 |
"sentence-t5-xl": 512,
|
| 518 |
"sentence-t5-xxl": 512,
|
| 519 |
"sup-simcse-bert-base-uncased": 512,
|
|
|
|
|
|
|
| 520 |
"text2vec-base-chinese": 512,
|
| 521 |
"text2vec-large-chinese": 512,
|
| 522 |
"text-embedding-ada-002": 8191,
|
|
@@ -590,6 +626,8 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
| 590 |
"sentence-t5-xl": 2.48,
|
| 591 |
"sentence-t5-xxl": 9.73,
|
| 592 |
"sup-simcse-bert-base-uncased": 0.44,
|
|
|
|
|
|
|
| 593 |
"text2vec-base-chinese": 0.41,
|
| 594 |
"text2vec-large-chinese": 1.30,
|
| 595 |
"unsup-simcse-bert-base-uncased": 0.44,
|
|
@@ -621,6 +659,7 @@ MODELS_TO_SKIP = {
|
|
| 621 |
"dmlls/all-mpnet-base-v2",
|
| 622 |
"cgldo/semanticClone",
|
| 623 |
"Malmuk1/e5-large-v2_Sharded",
|
|
|
|
| 624 |
}
|
| 625 |
|
| 626 |
EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
|
|
@@ -634,17 +673,17 @@ def add_lang(examples):
|
|
| 634 |
|
| 635 |
def add_task(examples):
|
| 636 |
# Could be added to the dataset loading script instead
|
| 637 |
-
if examples["mteb_dataset_name"] in TASK_LIST_CLASSIFICATION_NORM + TASK_LIST_CLASSIFICATION_DA +
|
| 638 |
examples["mteb_task"] = "Classification"
|
| 639 |
-
elif examples["mteb_dataset_name"] in TASK_LIST_CLUSTERING + TASK_LIST_CLUSTERING_DE + TASK_LIST_CLUSTERING_ZH:
|
| 640 |
examples["mteb_task"] = "Clustering"
|
| 641 |
-
elif examples["mteb_dataset_name"] in TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_PAIR_CLASSIFICATION_ZH:
|
| 642 |
examples["mteb_task"] = "PairClassification"
|
| 643 |
elif examples["mteb_dataset_name"] in TASK_LIST_RERANKING + TASK_LIST_RERANKING_ZH:
|
| 644 |
examples["mteb_task"] = "Reranking"
|
| 645 |
elif examples["mteb_dataset_name"] in TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH:
|
| 646 |
examples["mteb_task"] = "Retrieval"
|
| 647 |
-
elif examples["mteb_dataset_name"] in TASK_LIST_STS_NORM + TASK_LIST_STS_ZH:
|
| 648 |
examples["mteb_task"] = "STS"
|
| 649 |
elif examples["mteb_dataset_name"] in TASK_LIST_SUMMARIZATION:
|
| 650 |
examples["mteb_task"] = "Summarization"
|
|
@@ -915,7 +954,62 @@ def get_mteb_average_zh():
|
|
| 915 |
|
| 916 |
return DATA_OVERALL_ZH
|
| 917 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 918 |
get_mteb_average()
|
|
|
|
| 919 |
get_mteb_average_zh()
|
| 920 |
DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
|
| 921 |
DATA_BITEXT_MINING_OTHER = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_OTHER)
|
|
@@ -924,7 +1018,6 @@ DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIF
|
|
| 924 |
DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
|
| 925 |
DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
|
| 926 |
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
|
| 927 |
-
DATA_RETRIEVAL_PL = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_PL)
|
| 928 |
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
|
| 929 |
|
| 930 |
# Exact, add all non-nan integer values for every dataset
|
|
@@ -938,19 +1031,24 @@ for d in [
|
|
| 938 |
DATA_CLASSIFICATION_EN,
|
| 939 |
DATA_CLASSIFICATION_DA,
|
| 940 |
DATA_CLASSIFICATION_NB,
|
|
|
|
| 941 |
DATA_CLASSIFICATION_SV,
|
| 942 |
DATA_CLASSIFICATION_ZH,
|
| 943 |
DATA_CLASSIFICATION_OTHER,
|
| 944 |
DATA_CLUSTERING,
|
| 945 |
DATA_CLUSTERING_DE,
|
|
|
|
| 946 |
DATA_CLUSTERING_ZH,
|
| 947 |
DATA_PAIR_CLASSIFICATION,
|
|
|
|
| 948 |
DATA_PAIR_CLASSIFICATION_ZH,
|
| 949 |
DATA_RERANKING,
|
| 950 |
DATA_RERANKING_ZH,
|
| 951 |
DATA_RETRIEVAL,
|
|
|
|
| 952 |
DATA_RETRIEVAL_ZH,
|
| 953 |
DATA_STS_EN,
|
|
|
|
| 954 |
DATA_STS_ZH,
|
| 955 |
DATA_STS_OTHER,
|
| 956 |
DATA_SUMMARIZATION,
|
|
@@ -1017,6 +1115,25 @@ with block:
|
|
| 1017 |
with gr.Row():
|
| 1018 |
data_run_overall_zh = gr.Button("Refresh")
|
| 1019 |
data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1020 |
with gr.TabItem("Bitext Mining"):
|
| 1021 |
with gr.TabItem("English-X"):
|
| 1022 |
with gr.Row():
|
|
@@ -1184,7 +1301,36 @@ with block:
|
|
| 1184 |
datasets_classification_nb,
|
| 1185 |
],
|
| 1186 |
outputs=data_classification_nb,
|
| 1187 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1188 |
with gr.TabItem("Swedish"):
|
| 1189 |
with gr.Row():
|
| 1190 |
gr.Markdown("""
|
|
@@ -1316,7 +1462,32 @@ with block:
|
|
| 1316 |
get_mteb_data,
|
| 1317 |
inputs=[task_clustering_de, lang_clustering_de, datasets_clustering_de],
|
| 1318 |
outputs=data_clustering_de,
|
| 1319 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1320 |
with gr.TabItem("Pair Classification"):
|
| 1321 |
with gr.TabItem("English"):
|
| 1322 |
with gr.Row():
|
|
@@ -1375,6 +1546,35 @@ with block:
|
|
| 1375 |
],
|
| 1376 |
outputs=data_pair_classification_zh,
|
| 1377 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1378 |
with gr.TabItem("Reranking"):
|
| 1379 |
with gr.TabItem("English"):
|
| 1380 |
with gr.Row():
|
|
@@ -1561,6 +1761,31 @@ with block:
|
|
| 1561 |
inputs=[task_sts_zh, lang_sts_zh, datasets_sts_zh],
|
| 1562 |
outputs=data_sts_zh,
|
| 1563 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1564 |
with gr.TabItem("Other"):
|
| 1565 |
with gr.Row():
|
| 1566 |
gr.Markdown("""
|
|
@@ -1627,16 +1852,6 @@ with block:
|
|
| 1627 |
# This is optional - If deactivated the data loaded at "Build time" is shown like for Overall tab
|
| 1628 |
"""
|
| 1629 |
block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
|
| 1630 |
-
block.load(get_mteb_data, inputs=[task_classification_en, lang_classification_en], outputs=data_classification_en)
|
| 1631 |
-
block.load(get_mteb_data, inputs=[task_classification], outputs=data_classification)
|
| 1632 |
-
block.load(get_mteb_data, inputs=[task_clustering, empty, datasets_clustering], outputs=data_clustering)
|
| 1633 |
-
block.load(get_mteb_data, inputs=[task_clustering_de, empty_de, datasets_clustering_de], outputs=data_clustering_de)
|
| 1634 |
-
block.load(get_mteb_data, inputs=[task_pair_classification], outputs=data_pair_classification)
|
| 1635 |
-
block.load(get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval)
|
| 1636 |
-
block.load(get_mteb_data, inputs=[task_reranking], outputs=data_reranking)
|
| 1637 |
-
block.load(get_mteb_data, inputs=[task_sts_en, lang_sts_en], outputs=data_sts_en)
|
| 1638 |
-
block.load(get_mteb_data, inputs=[task_sts], outputs=data_sts)
|
| 1639 |
-
block.load(get_mteb_data, inputs=[task_summarization], outputs=data_summarization)
|
| 1640 |
"""
|
| 1641 |
|
| 1642 |
block.queue(concurrency_count=40, max_size=10)
|
|
|
|
| 57 |
"ScalaNbClassification",
|
| 58 |
]
|
| 59 |
|
| 60 |
+
TASK_LIST_CLASSIFICATION_PL = [
|
| 61 |
+
"AbusiveClauses",
|
| 62 |
+
"AllegroReviews",
|
| 63 |
+
"CBD",
|
| 64 |
+
"MassiveIntentClassification (pl)",
|
| 65 |
+
"MassiveScenarioClassification (pl)",
|
| 66 |
+
"PolEmo2.0-IN",
|
| 67 |
+
"PolEmo2.0-OUT",
|
| 68 |
+
]
|
| 69 |
+
|
| 70 |
TASK_LIST_CLASSIFICATION_SV = [
|
| 71 |
"DalajClassification",
|
| 72 |
"MassiveIntentClassification (sv)",
|
|
|
|
| 112 |
"TenKGnadClusteringS2S",
|
| 113 |
]
|
| 114 |
|
| 115 |
+
TASK_LIST_CLUSTERING_PL = [
|
| 116 |
+
"8TagsClustering",
|
| 117 |
+
]
|
| 118 |
+
|
| 119 |
TASK_LIST_CLUSTERING_ZH = [
|
| 120 |
"CLSClusteringP2P",
|
| 121 |
"CLSClusteringS2S",
|
|
|
|
| 129 |
"TwitterURLCorpus",
|
| 130 |
]
|
| 131 |
|
| 132 |
+
TASK_LIST_PAIR_CLASSIFICATION_PL = [
|
| 133 |
+
"CDSC-E",
|
| 134 |
+
"PPC",
|
| 135 |
+
"PSC",
|
| 136 |
+
"SICK-E-PL",
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
TASK_LIST_PAIR_CLASSIFICATION_ZH = [
|
| 140 |
"Cmnli",
|
| 141 |
"Ocnli",
|
|
|
|
| 226 |
"STSBenchmark",
|
| 227 |
]
|
| 228 |
|
| 229 |
+
TASK_LIST_STS_PL = [
|
| 230 |
+
"CDSC-R",
|
| 231 |
+
"SICK-R-PL",
|
| 232 |
+
"STS22 (pl)",
|
| 233 |
+
]
|
| 234 |
+
|
| 235 |
TASK_LIST_STS_ZH = [
|
| 236 |
"AFQMC",
|
| 237 |
"ATEC",
|
|
|
|
| 249 |
TASK_LIST_SUMMARIZATION = ["SummEval",]
|
| 250 |
|
| 251 |
TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
|
| 252 |
+
TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL
|
| 253 |
TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH
|
| 254 |
|
| 255 |
TASK_TO_METRIC = {
|
|
|
|
| 326 |
"sentence-t5-xl",
|
| 327 |
"sentence-t5-xxl",
|
| 328 |
"sup-simcse-bert-base-uncased",
|
| 329 |
+
"st-polish-paraphrase-from-distilroberta",
|
| 330 |
+
"st-polish-paraphrase-from-mpnet",
|
| 331 |
"text2vec-base-chinese",
|
| 332 |
"text2vec-large-chinese",
|
| 333 |
"text-embedding-ada-002",
|
|
|
|
| 401 |
"sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
|
| 402 |
"sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
|
| 403 |
"sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
|
| 404 |
+
"st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
|
| 405 |
+
"st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
|
| 406 |
"text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
|
| 407 |
"text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
|
| 408 |
"text-embedding-ada-002": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
|
|
|
| 476 |
"sentence-t5-xl": 768,
|
| 477 |
"sentence-t5-xxl": 768,
|
| 478 |
"sup-simcse-bert-base-uncased": 768,
|
| 479 |
+
"st-polish-paraphrase-from-distilroberta": 768,
|
| 480 |
+
"st-polish-paraphrase-from-mpnet": 768,
|
| 481 |
"text2vec-base-chinese": 768,
|
| 482 |
"text2vec-large-chinese": 1024,
|
| 483 |
"text-embedding-ada-002": 1536,
|
|
|
|
| 551 |
"sentence-t5-xl": 512,
|
| 552 |
"sentence-t5-xxl": 512,
|
| 553 |
"sup-simcse-bert-base-uncased": 512,
|
| 554 |
+
"st-polish-paraphrase-from-distilroberta": 514,
|
| 555 |
+
"st-polish-paraphrase-from-mpnet": 514,
|
| 556 |
"text2vec-base-chinese": 512,
|
| 557 |
"text2vec-large-chinese": 512,
|
| 558 |
"text-embedding-ada-002": 8191,
|
|
|
|
| 626 |
"sentence-t5-xl": 2.48,
|
| 627 |
"sentence-t5-xxl": 9.73,
|
| 628 |
"sup-simcse-bert-base-uncased": 0.44,
|
| 629 |
+
"st-polish-paraphrase-from-distilroberta": 0.50,
|
| 630 |
+
"st-polish-paraphrase-from-mpnet": 0.50,
|
| 631 |
"text2vec-base-chinese": 0.41,
|
| 632 |
"text2vec-large-chinese": 1.30,
|
| 633 |
"unsup-simcse-bert-base-uncased": 0.44,
|
|
|
|
| 659 |
"dmlls/all-mpnet-base-v2",
|
| 660 |
"cgldo/semanticClone",
|
| 661 |
"Malmuk1/e5-large-v2_Sharded",
|
| 662 |
+
"jncraton/gte-small-ct2-int8",
|
| 663 |
}
|
| 664 |
|
| 665 |
EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
|
|
|
|
| 673 |
|
| 674 |
def add_task(examples):
|
| 675 |
# Could be added to the dataset loading script instead
|
| 676 |
+
if examples["mteb_dataset_name"] in TASK_LIST_CLASSIFICATION_NORM + TASK_LIST_CLASSIFICATION_DA + TASK_LIST_CLASSIFICATION_NB + TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLASSIFICATION_SV + TASK_LIST_CLASSIFICATION_ZH:
|
| 677 |
examples["mteb_task"] = "Classification"
|
| 678 |
+
elif examples["mteb_dataset_name"] in TASK_LIST_CLUSTERING + TASK_LIST_CLUSTERING_DE + TASK_LIST_CLUSTERING_PL + TASK_LIST_CLUSTERING_ZH:
|
| 679 |
examples["mteb_task"] = "Clustering"
|
| 680 |
+
elif examples["mteb_dataset_name"] in TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_PAIR_CLASSIFICATION_ZH:
|
| 681 |
examples["mteb_task"] = "PairClassification"
|
| 682 |
elif examples["mteb_dataset_name"] in TASK_LIST_RERANKING + TASK_LIST_RERANKING_ZH:
|
| 683 |
examples["mteb_task"] = "Reranking"
|
| 684 |
elif examples["mteb_dataset_name"] in TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH:
|
| 685 |
examples["mteb_task"] = "Retrieval"
|
| 686 |
+
elif examples["mteb_dataset_name"] in TASK_LIST_STS_NORM + TASK_LIST_STS_PL + TASK_LIST_STS_ZH:
|
| 687 |
examples["mteb_task"] = "STS"
|
| 688 |
elif examples["mteb_dataset_name"] in TASK_LIST_SUMMARIZATION:
|
| 689 |
examples["mteb_task"] = "Summarization"
|
|
|
|
| 954 |
|
| 955 |
return DATA_OVERALL_ZH
|
| 956 |
|
| 957 |
+
def get_mteb_average_pl():
|
| 958 |
+
global DATA_OVERALL_PL, DATA_CLASSIFICATION_PL, DATA_CLUSTERING_PL, DATA_PAIR_CLASSIFICATION_PL, DATA_RETRIEVAL_PL, DATA_STS_PL
|
| 959 |
+
DATA_OVERALL_PL = get_mteb_data(
|
| 960 |
+
tasks=[
|
| 961 |
+
"Classification",
|
| 962 |
+
"Clustering",
|
| 963 |
+
"PairClassification",
|
| 964 |
+
"Retrieval",
|
| 965 |
+
"STS",
|
| 966 |
+
],
|
| 967 |
+
datasets=TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL,
|
| 968 |
+
fillna=False,
|
| 969 |
+
add_emb_dim=True,
|
| 970 |
+
rank=False,
|
| 971 |
+
)
|
| 972 |
+
# Debugging:
|
| 973 |
+
# DATA_OVERALL_PL.to_csv("overall.csv")
|
| 974 |
+
|
| 975 |
+
DATA_OVERALL_PL.insert(1, f"Average ({len(TASK_LIST_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PL].mean(axis=1, skipna=False))
|
| 976 |
+
DATA_OVERALL_PL.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLASSIFICATION_PL].mean(axis=1, skipna=False))
|
| 977 |
+
DATA_OVERALL_PL.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLUSTERING_PL].mean(axis=1, skipna=False))
|
| 978 |
+
DATA_OVERALL_PL.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PAIR_CLASSIFICATION_PL].mean(axis=1, skipna=False))
|
| 979 |
+
DATA_OVERALL_PL.insert(5, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_RETRIEVAL_PL].mean(axis=1, skipna=False))
|
| 980 |
+
DATA_OVERALL_PL.insert(6, f"STS Average ({len(TASK_LIST_STS_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_STS_PL].mean(axis=1, skipna=False))
|
| 981 |
+
DATA_OVERALL_PL.sort_values(f"Average ({len(TASK_LIST_PL)} datasets)", ascending=False, inplace=True)
|
| 982 |
+
# Start ranking from 1
|
| 983 |
+
DATA_OVERALL_PL.insert(0, "Rank", list(range(1, len(DATA_OVERALL_PL) + 1)))
|
| 984 |
+
|
| 985 |
+
DATA_OVERALL_PL = DATA_OVERALL_PL.round(2)
|
| 986 |
+
|
| 987 |
+
DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_CLASSIFICATION_PL])
|
| 988 |
+
# Only keep rows with at least one score in addition to the "Model" & rank column
|
| 989 |
+
DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:, 2:].ne("").any(axis=1)]
|
| 990 |
+
|
| 991 |
+
DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_CLUSTERING_PL])
|
| 992 |
+
DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:, 2:].ne("").any(axis=1)]
|
| 993 |
+
|
| 994 |
+
DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION_PL])
|
| 995 |
+
DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:, 2:].ne("").any(axis=1)]
|
| 996 |
+
|
| 997 |
+
DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_RETRIEVAL_PL])
|
| 998 |
+
DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:, 2:].ne("").any(axis=1)]
|
| 999 |
+
|
| 1000 |
+
DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_STS_PL])
|
| 1001 |
+
DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:, 2:].ne("").any(axis=1)]
|
| 1002 |
+
|
| 1003 |
+
# Fill NaN after averaging
|
| 1004 |
+
DATA_OVERALL_PL.fillna("", inplace=True)
|
| 1005 |
+
|
| 1006 |
+
DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]]
|
| 1007 |
+
DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
|
| 1008 |
+
|
| 1009 |
+
return DATA_OVERALL_PL
|
| 1010 |
+
|
| 1011 |
get_mteb_average()
|
| 1012 |
+
get_mteb_average_pl()
|
| 1013 |
get_mteb_average_zh()
|
| 1014 |
DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
|
| 1015 |
DATA_BITEXT_MINING_OTHER = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_OTHER)
|
|
|
|
| 1018 |
DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
|
| 1019 |
DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
|
| 1020 |
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
|
|
|
|
| 1021 |
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
|
| 1022 |
|
| 1023 |
# Exact, add all non-nan integer values for every dataset
|
|
|
|
| 1031 |
DATA_CLASSIFICATION_EN,
|
| 1032 |
DATA_CLASSIFICATION_DA,
|
| 1033 |
DATA_CLASSIFICATION_NB,
|
| 1034 |
+
DATA_CLASSIFICATION_PL,
|
| 1035 |
DATA_CLASSIFICATION_SV,
|
| 1036 |
DATA_CLASSIFICATION_ZH,
|
| 1037 |
DATA_CLASSIFICATION_OTHER,
|
| 1038 |
DATA_CLUSTERING,
|
| 1039 |
DATA_CLUSTERING_DE,
|
| 1040 |
+
DATA_CLUSTERING_PL,
|
| 1041 |
DATA_CLUSTERING_ZH,
|
| 1042 |
DATA_PAIR_CLASSIFICATION,
|
| 1043 |
+
DATA_PAIR_CLASSIFICATION_PL,
|
| 1044 |
DATA_PAIR_CLASSIFICATION_ZH,
|
| 1045 |
DATA_RERANKING,
|
| 1046 |
DATA_RERANKING_ZH,
|
| 1047 |
DATA_RETRIEVAL,
|
| 1048 |
+
DATA_RETRIEVAL_PL,
|
| 1049 |
DATA_RETRIEVAL_ZH,
|
| 1050 |
DATA_STS_EN,
|
| 1051 |
+
DATA_STS_PL,
|
| 1052 |
DATA_STS_ZH,
|
| 1053 |
DATA_STS_OTHER,
|
| 1054 |
DATA_SUMMARIZATION,
|
|
|
|
| 1115 |
with gr.Row():
|
| 1116 |
data_run_overall_zh = gr.Button("Refresh")
|
| 1117 |
data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
|
| 1118 |
+
with gr.TabItem("Polish"):
|
| 1119 |
+
with gr.Row():
|
| 1120 |
+
gr.Markdown("""
|
| 1121 |
+
**Overall MTEB Polish leaderboard (PL-MTEB) 🔮🇵🇱**
|
| 1122 |
+
|
| 1123 |
+
- **Metric:** Various, refer to task tabs
|
| 1124 |
+
- **Languages:** Polish
|
| 1125 |
+
- **Credits:** [Rafał Poświata](https://github.com/rafalposwiata), [Konrad Wojtasik](https://github.com/kwojtasi) & [BEIR-PL](https://arxiv.org/abs/2305.19840)
|
| 1126 |
+
""")
|
| 1127 |
+
with gr.Row():
|
| 1128 |
+
data_overall_pl = gr.components.Dataframe(
|
| 1129 |
+
DATA_OVERALL_PL,
|
| 1130 |
+
datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_PL.columns),
|
| 1131 |
+
type="pandas",
|
| 1132 |
+
wrap=True,
|
| 1133 |
+
)
|
| 1134 |
+
with gr.Row():
|
| 1135 |
+
data_run_overall_pl = gr.Button("Refresh")
|
| 1136 |
+
data_run_overall_pl.click(get_mteb_average_pl, inputs=None, outputs=data_overall_pl)
|
| 1137 |
with gr.TabItem("Bitext Mining"):
|
| 1138 |
with gr.TabItem("English-X"):
|
| 1139 |
with gr.Row():
|
|
|
|
| 1301 |
datasets_classification_nb,
|
| 1302 |
],
|
| 1303 |
outputs=data_classification_nb,
|
| 1304 |
+
)
|
| 1305 |
+
with gr.TabItem("Polish"):
|
| 1306 |
+
with gr.Row():
|
| 1307 |
+
gr.Markdown("""
|
| 1308 |
+
**Classification Polish Leaderboard 🤍🇵🇱**
|
| 1309 |
+
|
| 1310 |
+
- **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
|
| 1311 |
+
- **Languages:** Polish
|
| 1312 |
+
- **Credits:** [Rafał Poświata](https://github.com/rafalposwiata)
|
| 1313 |
+
""")
|
| 1314 |
+
with gr.Row():
|
| 1315 |
+
data_classification_pl = gr.components.Dataframe(
|
| 1316 |
+
DATA_CLASSIFICATION_PL,
|
| 1317 |
+
datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_PL.columns),
|
| 1318 |
+
type="pandas",
|
| 1319 |
+
)
|
| 1320 |
+
with gr.Row():
|
| 1321 |
+
data_run_classification_pl = gr.Button("Refresh")
|
| 1322 |
+
task_classification_pl = gr.Variable(value=["Classification"])
|
| 1323 |
+
lang_classification_pl = gr.Variable(value=[])
|
| 1324 |
+
datasets_classification_pl = gr.Variable(value=TASK_LIST_CLASSIFICATION_PL)
|
| 1325 |
+
data_run_classification_pl.click(
|
| 1326 |
+
get_mteb_data,
|
| 1327 |
+
inputs=[
|
| 1328 |
+
task_classification_pl,
|
| 1329 |
+
lang_classification_pl,
|
| 1330 |
+
datasets_classification_pl,
|
| 1331 |
+
],
|
| 1332 |
+
outputs=data_classification_pl,
|
| 1333 |
+
)
|
| 1334 |
with gr.TabItem("Swedish"):
|
| 1335 |
with gr.Row():
|
| 1336 |
gr.Markdown("""
|
|
|
|
| 1462 |
get_mteb_data,
|
| 1463 |
inputs=[task_clustering_de, lang_clustering_de, datasets_clustering_de],
|
| 1464 |
outputs=data_clustering_de,
|
| 1465 |
+
)
|
| 1466 |
+
with gr.TabItem("Polish"):
|
| 1467 |
+
with gr.Row():
|
| 1468 |
+
gr.Markdown("""
|
| 1469 |
+
**Clustering Polish Leaderboard ✨🇵🇱**
|
| 1470 |
+
|
| 1471 |
+
- **Metric:** Validity Measure (v_measure)
|
| 1472 |
+
- **Languages:** Polish
|
| 1473 |
+
- **Credits:** [Rafał Poświata](https://github.com/rafalposwiata)
|
| 1474 |
+
""")
|
| 1475 |
+
with gr.Row():
|
| 1476 |
+
data_clustering_pl = gr.components.Dataframe(
|
| 1477 |
+
DATA_CLUSTERING_PL,
|
| 1478 |
+
datatype=["number", "markdown"] + ["number"] * len(DATA_CLUSTERING_PL.columns) * 2,
|
| 1479 |
+
type="pandas",
|
| 1480 |
+
)
|
| 1481 |
+
with gr.Row():
|
| 1482 |
+
data_run_clustering_pl = gr.Button("Refresh")
|
| 1483 |
+
task_clustering_pl = gr.Variable(value=["Clustering"])
|
| 1484 |
+
lang_clustering_pl = gr.Variable(value=[])
|
| 1485 |
+
datasets_clustering_pl = gr.Variable(value=TASK_LIST_CLUSTERING_PL)
|
| 1486 |
+
data_run_clustering_pl.click(
|
| 1487 |
+
get_mteb_data,
|
| 1488 |
+
inputs=[task_clustering_pl, lang_clustering_pl, datasets_clustering_pl],
|
| 1489 |
+
outputs=data_clustering_pl,
|
| 1490 |
+
)
|
| 1491 |
with gr.TabItem("Pair Classification"):
|
| 1492 |
with gr.TabItem("English"):
|
| 1493 |
with gr.Row():
|
|
|
|
| 1546 |
],
|
| 1547 |
outputs=data_pair_classification_zh,
|
| 1548 |
)
|
| 1549 |
+
with gr.TabItem("Polish"):
|
| 1550 |
+
with gr.Row():
|
| 1551 |
+
gr.Markdown("""
|
| 1552 |
+
**Pair Classification Chinese Leaderboard 🎭🇵🇱**
|
| 1553 |
+
|
| 1554 |
+
- **Metric:** Average Precision based on Cosine Similarities (cos_sim_ap)
|
| 1555 |
+
- **Languages:** Polish
|
| 1556 |
+
- **Credits:** [Rafał Poświata](https://github.com/rafalposwiata)
|
| 1557 |
+
""")
|
| 1558 |
+
with gr.Row():
|
| 1559 |
+
data_pair_classification_pl = gr.components.Dataframe(
|
| 1560 |
+
DATA_PAIR_CLASSIFICATION_PL,
|
| 1561 |
+
datatype=["number", "markdown"] + ["number"] * len(DATA_PAIR_CLASSIFICATION_PL.columns),
|
| 1562 |
+
type="pandas",
|
| 1563 |
+
)
|
| 1564 |
+
with gr.Row():
|
| 1565 |
+
data_run = gr.Button("Refresh")
|
| 1566 |
+
task_pair_classification_pl = gr.Variable(value=["PairClassification"])
|
| 1567 |
+
lang_pair_classification_pl = gr.Variable(value=[])
|
| 1568 |
+
datasets_pair_classification_pl = gr.Variable(value=TASK_LIST_PAIR_CLASSIFICATION_PL)
|
| 1569 |
+
data_run_classification_pl.click(
|
| 1570 |
+
get_mteb_data,
|
| 1571 |
+
inputs=[
|
| 1572 |
+
task_pair_classification_pl,
|
| 1573 |
+
lang_pair_classification_pl,
|
| 1574 |
+
datasets_pair_classification_pl,
|
| 1575 |
+
],
|
| 1576 |
+
outputs=data_pair_classification_pl,
|
| 1577 |
+
)
|
| 1578 |
with gr.TabItem("Reranking"):
|
| 1579 |
with gr.TabItem("English"):
|
| 1580 |
with gr.Row():
|
|
|
|
| 1761 |
inputs=[task_sts_zh, lang_sts_zh, datasets_sts_zh],
|
| 1762 |
outputs=data_sts_zh,
|
| 1763 |
)
|
| 1764 |
+
with gr.TabItem("Polish"):
|
| 1765 |
+
with gr.Row():
|
| 1766 |
+
gr.Markdown("""
|
| 1767 |
+
**STS Polish Leaderboard 🤖🇵🇱**
|
| 1768 |
+
|
| 1769 |
+
- **Metric:** Spearman correlation based on cosine similarity
|
| 1770 |
+
- **Languages:** Polish
|
| 1771 |
+
- **Credits:** [Rafał Poświata](https://github.com/rafalposwiata)
|
| 1772 |
+
""")
|
| 1773 |
+
with gr.Row():
|
| 1774 |
+
data_sts_pl = gr.components.Dataframe(
|
| 1775 |
+
DATA_STS_PL,
|
| 1776 |
+
datatype=["number", "markdown"] + ["number"] * len(DATA_STS_PL.columns),
|
| 1777 |
+
type="pandas",
|
| 1778 |
+
)
|
| 1779 |
+
with gr.Row():
|
| 1780 |
+
data_run_sts_pl = gr.Button("Refresh")
|
| 1781 |
+
task_sts_pl = gr.Variable(value=["STS"])
|
| 1782 |
+
lang_sts_pl = gr.Variable(value=[])
|
| 1783 |
+
datasets_sts_pl = gr.Variable(value=TASK_LIST_STS_PL)
|
| 1784 |
+
data_run_sts_pl.click(
|
| 1785 |
+
get_mteb_data,
|
| 1786 |
+
inputs=[task_sts_pl, lang_sts_pl, datasets_sts_pl],
|
| 1787 |
+
outputs=data_sts_pl,
|
| 1788 |
+
)
|
| 1789 |
with gr.TabItem("Other"):
|
| 1790 |
with gr.Row():
|
| 1791 |
gr.Markdown("""
|
|
|
|
| 1852 |
# This is optional - If deactivated the data loaded at "Build time" is shown like for Overall tab
|
| 1853 |
"""
|
| 1854 |
block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1855 |
"""
|
| 1856 |
|
| 1857 |
block.queue(concurrency_count=40, max_size=10)
|