Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Commit 
							
							·
						
						3ffdc42
	
1
								Parent(s):
							
							003d24d
								
Updates
Browse files
    	
        README.md
    CHANGED
    
    | @@ -1,4 +1,3 @@ | |
| 1 | 
            -
             | 
| 2 | 
             
            ---
         | 
| 3 | 
             
            title: leaderboard 
         | 
| 4 | 
             
            emoji: 🔥
         | 
|  | |
|  | |
| 1 | 
             
            ---
         | 
| 2 | 
             
            title: leaderboard 
         | 
| 3 | 
             
            emoji: 🔥
         | 
    	
        app.py
    CHANGED
    
    | @@ -96,19 +96,6 @@ TASK_LIST_SUMMARIZATION = [ | |
| 96 |  | 
| 97 | 
             
            TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
         | 
| 98 |  | 
| 99 | 
            -
            TASK_TO_TASK_LIST = {}
         | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 102 | 
            -
             | 
| 103 | 
            -
            def make_clickable_model(model_name):
         | 
| 104 | 
            -
                # Remove user from model name
         | 
| 105 | 
            -
                model_name_show = " ".join(model_name.split("/")[1:])
         | 
| 106 | 
            -
                link = "https://huggingface.co/" + model_name
         | 
| 107 | 
            -
                return (
         | 
| 108 | 
            -
                    f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name_show}</a>'
         | 
| 109 | 
            -
                )
         | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 112 | 
             
            TASK_TO_METRIC = {
         | 
| 113 | 
             
                "BitextMining": "f1",
         | 
| 114 | 
             
                "Clustering": "v_measure",
         | 
| @@ -120,7 +107,16 @@ TASK_TO_METRIC = { | |
| 120 | 
             
                "Summarization": "cos_sim_spearman",
         | 
| 121 | 
             
            }
         | 
| 122 |  | 
| 123 | 
            -
            def  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 124 | 
             
                api = HfApi()
         | 
| 125 | 
             
                models = api.list_models(filter="mteb")
         | 
| 126 | 
             
                df_list = []
         | 
| @@ -141,9 +137,7 @@ def get_mteb_data(tasks=["Clustering"], metric="v_measure", langs=[], cast_to_st | |
| 141 | 
             
                    #        {"type": "f1", "value": 38.809586587791664},
         | 
| 142 | 
             
                    #    ],
         | 
| 143 | 
             
                    # },
         | 
| 144 | 
            -
             | 
| 145 | 
             
                    # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
         | 
| 146 | 
            -
                    #if langs is None:
         | 
| 147 | 
             
                    task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))]
         | 
| 148 | 
             
                    out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
         | 
| 149 | 
             
                    #else:
         | 
| @@ -170,53 +164,60 @@ def get_mteb_data(tasks=["Clustering"], metric="v_measure", langs=[], cast_to_st | |
| 170 | 
             
                cols = sorted(list(df.columns))
         | 
| 171 | 
             
                cols.insert(0, cols.pop(cols.index("Model")))
         | 
| 172 | 
             
                df = df[cols]
         | 
| 173 | 
            -
                # df.insert(1, "Average", df.mean(axis=1, skipna=False))
         | 
| 174 | 
             
                df.fillna("", inplace=True)
         | 
| 175 | 
             
                if cast_to_str:
         | 
| 176 | 
             
                    return df.astype(str) # Cast to str as Gradio does not accept floats
         | 
| 177 | 
             
                return df
         | 
| 178 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 179 |  | 
| 180 | 
            -
            DATA_OVERALL =  | 
| 181 | 
            -
                tasks=[
         | 
| 182 | 
            -
                    "Classification",
         | 
| 183 | 
            -
                    "Clustering",
         | 
| 184 | 
            -
                    "PairClassification",
         | 
| 185 | 
            -
                    "Reranking",
         | 
| 186 | 
            -
                    "Retrieval",
         | 
| 187 | 
            -
                    "STS",
         | 
| 188 | 
            -
                    "Summarization",
         | 
| 189 | 
            -
                ],
         | 
| 190 | 
            -
                langs=["en", "en-en"],
         | 
| 191 | 
            -
                cast_to_str=False
         | 
| 192 | 
            -
            )
         | 
| 193 | 
            -
             | 
| 194 | 
            -
            DATA_OVERALL.insert(1, "Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
         | 
| 195 | 
            -
            DATA_OVERALL.insert(2, "Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
         | 
| 196 | 
            -
            DATA_OVERALL.insert(3, "Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
         | 
| 197 | 
            -
            DATA_OVERALL.insert(4, "Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
         | 
| 198 | 
            -
            DATA_OVERALL.insert(5, "Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
         | 
| 199 | 
            -
            DATA_OVERALL.insert(6, "Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
         | 
| 200 | 
            -
            DATA_OVERALL.insert(7, "STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
         | 
| 201 | 
            -
            DATA_OVERALL.insert(8, "Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
         | 
| 202 | 
            -
            DATA_OVERALL = DATA_OVERALL.round(2).astype(str)
         | 
| 203 |  | 
| 204 | 
            -
            DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
         | 
| 205 | 
            -
            DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
         | 
| 206 | 
            -
            DATA_PAIR_CLASSIFICATION = DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION]
         | 
| 207 | 
            -
            DATA_RERANKING = DATA_OVERALL[["Model"] + TASK_LIST_RERANKING]
         | 
| 208 | 
            -
            DATA_RETRIEVAL = DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL]
         | 
| 209 | 
            -
            DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
         | 
| 210 | 
            -
            DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
         | 
| 211 |  | 
| 212 | 
            -
            DATA_OVERALL = DATA_OVERALL[["Model", "Average", "Classification Average", "Clustering Average", "Pair Classification Average", "Reranking Average", "Retrieval Average", "STS Average", "Summarization Average"]]
         | 
| 213 |  | 
|  | |
| 214 |  | 
|  | |
| 215 | 
             
            block = gr.Blocks()
         | 
| 216 |  | 
|  | |
| 217 | 
             
            with block:
         | 
| 218 | 
             
                gr.Markdown(
         | 
| 219 | 
            -
                    """Leaderboard | 
| 220 | 
             
                )
         | 
| 221 | 
             
                with gr.Tabs():
         | 
| 222 | 
             
                    with gr.TabItem("Overall"):
         | 
| @@ -225,11 +226,30 @@ with block: | |
| 225 | 
             
                        with gr.Row():
         | 
| 226 | 
             
                            data_overall = gr.components.Dataframe(
         | 
| 227 | 
             
                                DATA_OVERALL,
         | 
| 228 | 
            -
                                datatype="markdown",
         | 
| 229 | 
             
                                type="pandas",
         | 
| 230 | 
            -
                                col_count=(len(DATA_OVERALL.columns), "fixed"),
         | 
| 231 | 
             
                                wrap=True,
         | 
| 232 | 
             
                            )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 233 | 
             
                    with gr.TabItem("Classification"):
         | 
| 234 | 
             
                        with gr.TabItem("English"):
         | 
| 235 | 
             
                            with gr.Row():
         | 
| @@ -237,20 +257,17 @@ with block: | |
| 237 | 
             
                            with gr.Row():
         | 
| 238 | 
             
                                data_classification_en = gr.components.Dataframe(
         | 
| 239 | 
             
                                    DATA_CLASSIFICATION_EN,
         | 
| 240 | 
            -
                                    datatype="markdown",
         | 
| 241 | 
             
                                    type="pandas",
         | 
| 242 | 
            -
                                    col_count=(len(DATA_CLASSIFICATION_EN.columns), "fixed"),
         | 
| 243 | 
             
                                )
         | 
| 244 | 
             
                            with gr.Row():
         | 
| 245 | 
            -
                                 | 
| 246 | 
             
                                task_classification_en = gr.Variable(value="Classification")
         | 
| 247 | 
            -
                                metric_classification_en = gr.Variable(value="accuracy")
         | 
| 248 | 
             
                                lang_classification_en = gr.Variable(value=["en"])
         | 
| 249 | 
            -
                                 | 
| 250 | 
             
                                    get_mteb_data,
         | 
| 251 | 
             
                                    inputs=[
         | 
| 252 | 
             
                                        task_classification_en,
         | 
| 253 | 
            -
                                        metric_classification_en,
         | 
| 254 | 
             
                                        lang_classification_en,
         | 
| 255 | 
             
                                    ],
         | 
| 256 | 
             
                                    outputs=data_classification_en,
         | 
| @@ -260,16 +277,15 @@ with block: | |
| 260 | 
             
                                gr.Markdown("""Multilingual Classification""")
         | 
| 261 | 
             
                            with gr.Row():
         | 
| 262 | 
             
                                data_classification = gr.components.Dataframe(
         | 
| 263 | 
            -
                                    datatype=["markdown"] * 500,
         | 
| 264 | 
             
                                    type="pandas",
         | 
| 265 | 
             
                                )
         | 
| 266 | 
             
                            with gr.Row():
         | 
| 267 | 
             
                                data_run = gr.Button("Refresh")
         | 
| 268 | 
             
                                task_classification = gr.Variable(value="Classification")
         | 
| 269 | 
            -
                                metric_classification = gr.Variable(value="accuracy")
         | 
| 270 | 
             
                                data_run.click(
         | 
| 271 | 
             
                                    get_mteb_data,
         | 
| 272 | 
            -
                                    inputs=[task_classification | 
| 273 | 
             
                                    outputs=data_classification,
         | 
| 274 | 
             
                                )
         | 
| 275 | 
             
                    with gr.TabItem("Clustering"):
         | 
| @@ -277,48 +293,68 @@ with block: | |
| 277 | 
             
                            gr.Markdown("""Leaderboard for Clustering""")
         | 
| 278 | 
             
                        with gr.Row():
         | 
| 279 | 
             
                            data_clustering = gr.components.Dataframe(
         | 
| 280 | 
            -
                                 | 
|  | |
| 281 | 
             
                                type="pandas",
         | 
|  | |
| 282 | 
             
                            )
         | 
| 283 | 
             
                        with gr.Row():
         | 
| 284 | 
             
                            data_run = gr.Button("Refresh")
         | 
| 285 | 
             
                            task_clustering = gr.Variable(value="Clustering")
         | 
| 286 | 
            -
                            metric_clustering = gr.Variable(value="v_measure")
         | 
| 287 | 
             
                            data_run.click(
         | 
| 288 | 
             
                                get_mteb_data,
         | 
| 289 | 
            -
                                inputs=[task_clustering | 
| 290 | 
             
                                outputs=data_clustering,
         | 
| 291 | 
             
                            )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 292 | 
             
                    with gr.TabItem("Retrieval"):
         | 
| 293 | 
             
                        with gr.Row():
         | 
| 294 | 
             
                            gr.Markdown("""Leaderboard for Retrieval""")
         | 
| 295 | 
             
                        with gr.Row():
         | 
| 296 | 
             
                            data_retrieval = gr.components.Dataframe(
         | 
| 297 | 
            -
                                 | 
|  | |
| 298 | 
             
                                type="pandas",
         | 
| 299 | 
             
                            )
         | 
| 300 | 
             
                        with gr.Row():
         | 
| 301 | 
             
                            data_run = gr.Button("Refresh")
         | 
| 302 | 
             
                            task_retrieval = gr.Variable(value="Retrieval")
         | 
| 303 | 
            -
                            metric_retrieval = gr.Variable(value="ndcg_at_10")
         | 
| 304 | 
             
                            data_run.click(
         | 
| 305 | 
            -
                                get_mteb_data, inputs=[task_retrieval | 
| 306 | 
             
                            )
         | 
| 307 | 
             
                    with gr.TabItem("Reranking"):
         | 
| 308 | 
             
                        with gr.Row():
         | 
| 309 | 
             
                            gr.Markdown("""Leaderboard for Reranking""")
         | 
| 310 | 
             
                        with gr.Row():
         | 
| 311 | 
             
                            data_reranking = gr.components.Dataframe(
         | 
| 312 | 
            -
                                 | 
|  | |
| 313 | 
             
                                type="pandas",
         | 
| 314 | 
            -
                                 | 
| 315 | 
             
                            )
         | 
| 316 | 
             
                        with gr.Row():
         | 
| 317 | 
             
                            data_run = gr.Button("Refresh")
         | 
| 318 | 
             
                            task_reranking = gr.Variable(value="Reranking")
         | 
| 319 | 
             
                            metric_reranking = gr.Variable(value="map")
         | 
| 320 | 
             
                            data_run.click(
         | 
| 321 | 
            -
                                get_mteb_data, inputs=[task_reranking | 
| 322 | 
             
                            )
         | 
| 323 | 
             
                    with gr.TabItem("STS"):
         | 
| 324 | 
             
                        with gr.TabItem("English"):
         | 
| @@ -326,17 +362,18 @@ with block: | |
| 326 | 
             
                                gr.Markdown("""Leaderboard for STS""")
         | 
| 327 | 
             
                            with gr.Row():
         | 
| 328 | 
             
                                data_sts_en = gr.components.Dataframe(
         | 
| 329 | 
            -
                                     | 
|  | |
| 330 | 
             
                                    type="pandas",
         | 
|  | |
| 331 | 
             
                                )
         | 
| 332 | 
             
                            with gr.Row():
         | 
| 333 | 
             
                                data_run_en = gr.Button("Refresh")
         | 
| 334 | 
             
                                task_sts_en = gr.Variable(value="STS")
         | 
| 335 | 
            -
                                metric_sts_en = gr.Variable(value="cos_sim_spearman")
         | 
| 336 | 
             
                                lang_sts_en = gr.Variable(value=["en", "en-en"])
         | 
| 337 | 
             
                                data_run.click(
         | 
| 338 | 
             
                                    get_mteb_data,
         | 
| 339 | 
            -
                                    inputs=[task_sts_en,  | 
| 340 | 
             
                                    outputs=data_sts_en,
         | 
| 341 | 
             
                                )
         | 
| 342 | 
             
                        with gr.TabItem("Multilingual"):
         | 
| @@ -344,49 +381,49 @@ with block: | |
| 344 | 
             
                                gr.Markdown("""Leaderboard for STS""")
         | 
| 345 | 
             
                            with gr.Row():
         | 
| 346 | 
             
                                data_sts = gr.components.Dataframe(
         | 
| 347 | 
            -
                                    datatype=["markdown"] *  | 
| 348 | 
             
                                    type="pandas",
         | 
| 349 | 
             
                                )
         | 
| 350 | 
             
                            with gr.Row():
         | 
| 351 | 
             
                                data_run = gr.Button("Refresh")
         | 
| 352 | 
             
                                task_sts = gr.Variable(value="STS")
         | 
| 353 | 
            -
                                 | 
| 354 | 
            -
                                data_run.click(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts)
         | 
| 355 | 
             
                    with gr.TabItem("Summarization"):
         | 
| 356 | 
             
                        with gr.Row():
         | 
| 357 | 
             
                            gr.Markdown("""Leaderboard for Summarization""")
         | 
| 358 | 
             
                        with gr.Row():
         | 
| 359 | 
             
                            data_summarization = gr.components.Dataframe(
         | 
| 360 | 
            -
                                 | 
|  | |
| 361 | 
             
                                type="pandas",
         | 
|  | |
| 362 | 
             
                            )
         | 
| 363 | 
             
                        with gr.Row():
         | 
| 364 | 
             
                            data_run = gr.Button("Refresh")
         | 
| 365 | 
             
                            task_summarization = gr.Variable(value="Summarization")
         | 
| 366 | 
            -
                            metric_summarization = gr.Variable(value="cos_sim_spearman")
         | 
| 367 | 
             
                            data_run.click(
         | 
| 368 | 
             
                                get_mteb_data,
         | 
| 369 | 
            -
                                inputs=[task_summarization | 
| 370 | 
             
                                outputs=data_summarization,
         | 
| 371 | 
             
                            )
         | 
| 372 | 
             
                # running the function on page load in addition to when the button is clicked
         | 
| 373 | 
            -
                 | 
| 374 | 
            -
                 | 
| 375 | 
            -
                 | 
| 376 | 
            -
                 | 
| 377 | 
            -
                 | 
| 378 | 
            -
                 | 
| 379 | 
            -
                block.load(
         | 
| 380 | 
            -
             | 
| 381 | 
            -
                    inputs=[task_classification, metric_classification],
         | 
| 382 | 
            -
                    outputs=data_classification,
         | 
| 383 | 
            -
                )
         | 
| 384 | 
            -
                block.load(get_mteb_data, inputs=[task_clustering, metric_clustering], outputs=data_clustering)
         | 
| 385 | 
            -
                block.load(get_mteb_data, inputs=[task_retrieval, metric_retrieval], outputs=data_retrieval)
         | 
| 386 | 
            -
                block.load(get_mteb_data, inputs=[task_reranking, metric_reranking], outputs=data_reranking)
         | 
| 387 | 
            -
                block.load(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts)
         | 
| 388 | 
            -
                block.load(
         | 
| 389 | 
            -
                    get_mteb_data, inputs=[task_summarization, metric_summarization], outputs=data_summarization
         | 
| 390 | 
            -
                )
         | 
| 391 |  | 
| 392 | 
             
            block.launch()
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 96 |  | 
| 97 | 
             
            TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
         | 
| 98 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 99 | 
             
            TASK_TO_METRIC = {
         | 
| 100 | 
             
                "BitextMining": "f1",
         | 
| 101 | 
             
                "Clustering": "v_measure",
         | 
|  | |
| 107 | 
             
                "Summarization": "cos_sim_spearman",
         | 
| 108 | 
             
            }
         | 
| 109 |  | 
| 110 | 
            +
            def make_clickable_model(model_name):
         | 
| 111 | 
            +
                # Remove user from model name
         | 
| 112 | 
            +
                model_name_show = " ".join(model_name.split("/")[1:])
         | 
| 113 | 
            +
                link = "https://huggingface.co/" + model_name
         | 
| 114 | 
            +
                return (
         | 
| 115 | 
            +
                    f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name_show}</a>'
         | 
| 116 | 
            +
                )
         | 
| 117 | 
            +
             | 
| 118 | 
            +
             | 
| 119 | 
            +
            def get_mteb_data(tasks=["Clustering"], langs=[], cast_to_str=True, task_to_metric=TASK_TO_METRIC):
         | 
| 120 | 
             
                api = HfApi()
         | 
| 121 | 
             
                models = api.list_models(filter="mteb")
         | 
| 122 | 
             
                df_list = []
         | 
|  | |
| 137 | 
             
                    #        {"type": "f1", "value": 38.809586587791664},
         | 
| 138 | 
             
                    #    ],
         | 
| 139 | 
             
                    # },
         | 
|  | |
| 140 | 
             
                    # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
         | 
|  | |
| 141 | 
             
                    task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))]
         | 
| 142 | 
             
                    out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
         | 
| 143 | 
             
                    #else:
         | 
|  | |
| 164 | 
             
                cols = sorted(list(df.columns))
         | 
| 165 | 
             
                cols.insert(0, cols.pop(cols.index("Model")))
         | 
| 166 | 
             
                df = df[cols]
         | 
|  | |
| 167 | 
             
                df.fillna("", inplace=True)
         | 
| 168 | 
             
                if cast_to_str:
         | 
| 169 | 
             
                    return df.astype(str) # Cast to str as Gradio does not accept floats
         | 
| 170 | 
             
                return df
         | 
| 171 |  | 
| 172 | 
            +
            def get_mteb_average(get_all_avgs=False):
         | 
| 173 | 
            +
                global DATA_OVERALL, DATA_CLASSIFICATION_EN, DATA_CLUSTERING, DATA_PAIR_CLASSIFICATION, DATA_RERANKING, DATA_RETRIEVAL, DATA_STS_EN, DATA_SUMMARIZATION
         | 
| 174 | 
            +
                DATA_OVERALL = get_mteb_data(
         | 
| 175 | 
            +
                    tasks=[
         | 
| 176 | 
            +
                        "Classification",
         | 
| 177 | 
            +
                        "Clustering",
         | 
| 178 | 
            +
                        "PairClassification",
         | 
| 179 | 
            +
                        "Reranking",
         | 
| 180 | 
            +
                        "Retrieval",
         | 
| 181 | 
            +
                        "STS",
         | 
| 182 | 
            +
                        "Summarization",
         | 
| 183 | 
            +
                    ],
         | 
| 184 | 
            +
                    langs=["en", "en-en"],
         | 
| 185 | 
            +
                    cast_to_str=False
         | 
| 186 | 
            +
                )
         | 
| 187 | 
            +
                
         | 
| 188 | 
            +
                DATA_OVERALL.insert(1, "Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
         | 
| 189 | 
            +
                DATA_OVERALL.insert(2, "Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
         | 
| 190 | 
            +
                DATA_OVERALL.insert(3, "Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
         | 
| 191 | 
            +
                DATA_OVERALL.insert(4, "Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
         | 
| 192 | 
            +
                DATA_OVERALL.insert(5, "Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
         | 
| 193 | 
            +
                DATA_OVERALL.insert(6, "Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
         | 
| 194 | 
            +
                DATA_OVERALL.insert(7, "STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
         | 
| 195 | 
            +
                DATA_OVERALL.insert(8, "Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
         | 
| 196 | 
            +
                DATA_OVERALL.sort_values("Average", ascending=False, inplace=True)
         | 
| 197 | 
            +
                # Start ranking from 1
         | 
| 198 | 
            +
                DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
         | 
| 199 |  | 
| 200 | 
            +
                DATA_OVERALL = DATA_OVERALL.round(2).astype(str)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 201 |  | 
| 202 | 
            +
                DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
         | 
| 203 | 
            +
                DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
         | 
| 204 | 
            +
                DATA_PAIR_CLASSIFICATION = DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION]
         | 
| 205 | 
            +
                DATA_RERANKING = DATA_OVERALL[["Model"] + TASK_LIST_RERANKING]
         | 
| 206 | 
            +
                DATA_RETRIEVAL = DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL]
         | 
| 207 | 
            +
                DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
         | 
| 208 | 
            +
                DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
         | 
| 209 |  | 
| 210 | 
            +
                DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Average", "Classification Average", "Clustering Average", "Pair Classification Average", "Reranking Average", "Retrieval Average", "STS Average", "Summarization Average"]]
         | 
| 211 |  | 
| 212 | 
            +
                return DATA_OVERALL
         | 
| 213 |  | 
| 214 | 
            +
            get_mteb_average()
         | 
| 215 | 
             
            block = gr.Blocks()
         | 
| 216 |  | 
| 217 | 
            +
             | 
| 218 | 
             
            with block:
         | 
| 219 | 
             
                gr.Markdown(
         | 
| 220 | 
            +
                    """MTEB Leaderboard. See <a href="https://huggingface.co/Gradio-Blocks" target="_blank" style="text-decoration: underline">Blocks Party Event</a>"""
         | 
| 221 | 
             
                )
         | 
| 222 | 
             
                with gr.Tabs():
         | 
| 223 | 
             
                    with gr.TabItem("Overall"):
         | 
|  | |
| 226 | 
             
                        with gr.Row():
         | 
| 227 | 
             
                            data_overall = gr.components.Dataframe(
         | 
| 228 | 
             
                                DATA_OVERALL,
         | 
| 229 | 
            +
                                datatype=["markdown"] * len(DATA_OVERALL.columns) * 2,
         | 
| 230 | 
             
                                type="pandas",
         | 
| 231 | 
            +
                                #col_count=(len(DATA_OVERALL.columns), "fixed"),
         | 
| 232 | 
             
                                wrap=True,
         | 
| 233 | 
             
                            )
         | 
| 234 | 
            +
                        with gr.Row():
         | 
| 235 | 
            +
                            data_run = gr.Button("Refresh")
         | 
| 236 | 
            +
                            data_run.click(get_mteb_average, inputs=None, outputs=data_overall)                
         | 
| 237 | 
            +
                    with gr.TabItem("BitextMining"):
         | 
| 238 | 
            +
                        with gr.Row():
         | 
| 239 | 
            +
                            gr.Markdown("""Leaderboard for Clustering""")
         | 
| 240 | 
            +
                        with gr.Row():
         | 
| 241 | 
            +
                            data_bitext_mining = gr.components.Dataframe(
         | 
| 242 | 
            +
                                datatype=["markdown"] * 500, # hack when we don't know how many columns
         | 
| 243 | 
            +
                                type="pandas",
         | 
| 244 | 
            +
                            )
         | 
| 245 | 
            +
                        with gr.Row():
         | 
| 246 | 
            +
                            data_run = gr.Button("Refresh")
         | 
| 247 | 
            +
                            task_bitext_mining = gr.Variable(value="BitextMining")
         | 
| 248 | 
            +
                            data_run.click(
         | 
| 249 | 
            +
                                get_mteb_data,
         | 
| 250 | 
            +
                                inputs=[task_bitext_mining],
         | 
| 251 | 
            +
                                outputs=data_bitext_mining,
         | 
| 252 | 
            +
                            )
         | 
| 253 | 
             
                    with gr.TabItem("Classification"):
         | 
| 254 | 
             
                        with gr.TabItem("English"):
         | 
| 255 | 
             
                            with gr.Row():
         | 
|  | |
| 257 | 
             
                            with gr.Row():
         | 
| 258 | 
             
                                data_classification_en = gr.components.Dataframe(
         | 
| 259 | 
             
                                    DATA_CLASSIFICATION_EN,
         | 
| 260 | 
            +
                                    datatype=["markdown"] * len(DATA_CLASSIFICATION_EN.columns) * 20,
         | 
| 261 | 
             
                                    type="pandas",
         | 
|  | |
| 262 | 
             
                                )
         | 
| 263 | 
             
                            with gr.Row():
         | 
| 264 | 
            +
                                data_run_classification_en = gr.Button("Refresh")
         | 
| 265 | 
             
                                task_classification_en = gr.Variable(value="Classification")
         | 
|  | |
| 266 | 
             
                                lang_classification_en = gr.Variable(value=["en"])
         | 
| 267 | 
            +
                                data_run_classification_en.click(
         | 
| 268 | 
             
                                    get_mteb_data,
         | 
| 269 | 
             
                                    inputs=[
         | 
| 270 | 
             
                                        task_classification_en,
         | 
|  | |
| 271 | 
             
                                        lang_classification_en,
         | 
| 272 | 
             
                                    ],
         | 
| 273 | 
             
                                    outputs=data_classification_en,
         | 
|  | |
| 277 | 
             
                                gr.Markdown("""Multilingual Classification""")
         | 
| 278 | 
             
                            with gr.Row():
         | 
| 279 | 
             
                                data_classification = gr.components.Dataframe(
         | 
| 280 | 
            +
                                    datatype=["markdown"] * 500, # hack when we don't know how many columns
         | 
| 281 | 
             
                                    type="pandas",
         | 
| 282 | 
             
                                )
         | 
| 283 | 
             
                            with gr.Row():
         | 
| 284 | 
             
                                data_run = gr.Button("Refresh")
         | 
| 285 | 
             
                                task_classification = gr.Variable(value="Classification")
         | 
|  | |
| 286 | 
             
                                data_run.click(
         | 
| 287 | 
             
                                    get_mteb_data,
         | 
| 288 | 
            +
                                    inputs=[task_classification],
         | 
| 289 | 
             
                                    outputs=data_classification,
         | 
| 290 | 
             
                                )
         | 
| 291 | 
             
                    with gr.TabItem("Clustering"):
         | 
|  | |
| 293 | 
             
                            gr.Markdown("""Leaderboard for Clustering""")
         | 
| 294 | 
             
                        with gr.Row():
         | 
| 295 | 
             
                            data_clustering = gr.components.Dataframe(
         | 
| 296 | 
            +
                                DATA_CLUSTERING,
         | 
| 297 | 
            +
                                datatype="markdown",
         | 
| 298 | 
             
                                type="pandas",
         | 
| 299 | 
            +
                                col_count=(len(DATA_CLUSTERING.columns), "fixed"),
         | 
| 300 | 
             
                            )
         | 
| 301 | 
             
                        with gr.Row():
         | 
| 302 | 
             
                            data_run = gr.Button("Refresh")
         | 
| 303 | 
             
                            task_clustering = gr.Variable(value="Clustering")
         | 
|  | |
| 304 | 
             
                            data_run.click(
         | 
| 305 | 
             
                                get_mteb_data,
         | 
| 306 | 
            +
                                inputs=[task_clustering],
         | 
| 307 | 
             
                                outputs=data_clustering,
         | 
| 308 | 
             
                            )
         | 
| 309 | 
            +
                    with gr.TabItem("Pair Classification"):
         | 
| 310 | 
            +
                        with gr.Row():
         | 
| 311 | 
            +
                            gr.Markdown("""Leaderboard for Pair Classification""")
         | 
| 312 | 
            +
                        with gr.Row():
         | 
| 313 | 
            +
                            data_pair_classification = gr.components.Dataframe(
         | 
| 314 | 
            +
                                DATA_PAIR_CLASSIFICATION,
         | 
| 315 | 
            +
                                datatype="markdown",
         | 
| 316 | 
            +
                                type="pandas",
         | 
| 317 | 
            +
                                col_count=(len(DATA_PAIR_CLASSIFICATION.columns), "fixed"),
         | 
| 318 | 
            +
                            )
         | 
| 319 | 
            +
                        with gr.Row():
         | 
| 320 | 
            +
                            data_run = gr.Button("Refresh")
         | 
| 321 | 
            +
                            task_pair_classification = gr.Variable(value="Clustering")
         | 
| 322 | 
            +
                            data_run.click(
         | 
| 323 | 
            +
                                get_mteb_data,
         | 
| 324 | 
            +
                                inputs=[task_pair_classification],
         | 
| 325 | 
            +
                                outputs=data_pair_classification,
         | 
| 326 | 
            +
                            )
         | 
| 327 | 
             
                    with gr.TabItem("Retrieval"):
         | 
| 328 | 
             
                        with gr.Row():
         | 
| 329 | 
             
                            gr.Markdown("""Leaderboard for Retrieval""")
         | 
| 330 | 
             
                        with gr.Row():
         | 
| 331 | 
             
                            data_retrieval = gr.components.Dataframe(
         | 
| 332 | 
            +
                                DATA_RETRIEVAL,
         | 
| 333 | 
            +
                                datatype=["markdown"] * len(DATA_RETRIEVAL.columns) * 2,
         | 
| 334 | 
             
                                type="pandas",
         | 
| 335 | 
             
                            )
         | 
| 336 | 
             
                        with gr.Row():
         | 
| 337 | 
             
                            data_run = gr.Button("Refresh")
         | 
| 338 | 
             
                            task_retrieval = gr.Variable(value="Retrieval")
         | 
|  | |
| 339 | 
             
                            data_run.click(
         | 
| 340 | 
            +
                                get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval
         | 
| 341 | 
             
                            )
         | 
| 342 | 
             
                    with gr.TabItem("Reranking"):
         | 
| 343 | 
             
                        with gr.Row():
         | 
| 344 | 
             
                            gr.Markdown("""Leaderboard for Reranking""")
         | 
| 345 | 
             
                        with gr.Row():
         | 
| 346 | 
             
                            data_reranking = gr.components.Dataframe(
         | 
| 347 | 
            +
                                DATA_RERANKING,
         | 
| 348 | 
            +
                                datatype="markdown",
         | 
| 349 | 
             
                                type="pandas",
         | 
| 350 | 
            +
                                col_count=(len(DATA_RERANKING.columns), "fixed"),
         | 
| 351 | 
             
                            )
         | 
| 352 | 
             
                        with gr.Row():
         | 
| 353 | 
             
                            data_run = gr.Button("Refresh")
         | 
| 354 | 
             
                            task_reranking = gr.Variable(value="Reranking")
         | 
| 355 | 
             
                            metric_reranking = gr.Variable(value="map")
         | 
| 356 | 
             
                            data_run.click(
         | 
| 357 | 
            +
                                get_mteb_data, inputs=[task_reranking], outputs=data_reranking
         | 
| 358 | 
             
                            )
         | 
| 359 | 
             
                    with gr.TabItem("STS"):
         | 
| 360 | 
             
                        with gr.TabItem("English"):
         | 
|  | |
| 362 | 
             
                                gr.Markdown("""Leaderboard for STS""")
         | 
| 363 | 
             
                            with gr.Row():
         | 
| 364 | 
             
                                data_sts_en = gr.components.Dataframe(
         | 
| 365 | 
            +
                                    DATA_STS_EN,
         | 
| 366 | 
            +
                                    datatype="markdown",
         | 
| 367 | 
             
                                    type="pandas",
         | 
| 368 | 
            +
                                    col_count=(len(DATA_STS_EN.columns), "fixed"),
         | 
| 369 | 
             
                                )
         | 
| 370 | 
             
                            with gr.Row():
         | 
| 371 | 
             
                                data_run_en = gr.Button("Refresh")
         | 
| 372 | 
             
                                task_sts_en = gr.Variable(value="STS")
         | 
|  | |
| 373 | 
             
                                lang_sts_en = gr.Variable(value=["en", "en-en"])
         | 
| 374 | 
             
                                data_run.click(
         | 
| 375 | 
             
                                    get_mteb_data,
         | 
| 376 | 
            +
                                    inputs=[task_sts_en, lang_sts_en],
         | 
| 377 | 
             
                                    outputs=data_sts_en,
         | 
| 378 | 
             
                                )
         | 
| 379 | 
             
                        with gr.TabItem("Multilingual"):
         | 
|  | |
| 381 | 
             
                                gr.Markdown("""Leaderboard for STS""")
         | 
| 382 | 
             
                            with gr.Row():
         | 
| 383 | 
             
                                data_sts = gr.components.Dataframe(
         | 
| 384 | 
            +
                                    datatype=["markdown"] * 50, # hack when we don't know how many columns
         | 
| 385 | 
             
                                    type="pandas",
         | 
| 386 | 
             
                                )
         | 
| 387 | 
             
                            with gr.Row():
         | 
| 388 | 
             
                                data_run = gr.Button("Refresh")
         | 
| 389 | 
             
                                task_sts = gr.Variable(value="STS")
         | 
| 390 | 
            +
                                data_run.click(get_mteb_data, inputs=[task_sts], outputs=data_sts)
         | 
|  | |
| 391 | 
             
                    with gr.TabItem("Summarization"):
         | 
| 392 | 
             
                        with gr.Row():
         | 
| 393 | 
             
                            gr.Markdown("""Leaderboard for Summarization""")
         | 
| 394 | 
             
                        with gr.Row():
         | 
| 395 | 
             
                            data_summarization = gr.components.Dataframe(
         | 
| 396 | 
            +
                                DATA_SUMMARIZATION,
         | 
| 397 | 
            +
                                datatype="markdown",
         | 
| 398 | 
             
                                type="pandas",
         | 
| 399 | 
            +
                                col_count=(len(DATA_SUMMARIZATION.columns), "fixed"),
         | 
| 400 | 
             
                            )
         | 
| 401 | 
             
                        with gr.Row():
         | 
| 402 | 
             
                            data_run = gr.Button("Refresh")
         | 
| 403 | 
             
                            task_summarization = gr.Variable(value="Summarization")
         | 
|  | |
| 404 | 
             
                            data_run.click(
         | 
| 405 | 
             
                                get_mteb_data,
         | 
| 406 | 
            +
                                inputs=[task_summarization],
         | 
| 407 | 
             
                                outputs=data_summarization,
         | 
| 408 | 
             
                            )
         | 
| 409 | 
             
                # running the function on page load in addition to when the button is clicked
         | 
| 410 | 
            +
                block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
         | 
| 411 | 
            +
                block.load(get_mteb_data, inputs=[task_classification_en, lang_classification_en], outputs=data_classification_en)
         | 
| 412 | 
            +
                block.load(get_mteb_data, inputs=[task_classification], outputs=data_classification)
         | 
| 413 | 
            +
                block.load(get_mteb_data, inputs=[task_clustering], outputs=data_clustering)
         | 
| 414 | 
            +
                block.load(get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval)
         | 
| 415 | 
            +
                block.load(get_mteb_data, inputs=[task_reranking], outputs=data_reranking)
         | 
| 416 | 
            +
                block.load(get_mteb_data, inputs=[task_sts], outputs=data_sts)
         | 
| 417 | 
            +
                block.load(get_mteb_data, inputs=[task_summarization], outputs=data_summarization)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 418 |  | 
| 419 | 
             
            block.launch()
         | 
| 420 | 
            +
             | 
| 421 | 
            +
             | 
| 422 | 
            +
            # Possible changes:
         | 
| 423 | 
            +
            # Could check if tasks are valid (Currently users could just invent new tasks - similar for languages)
         | 
| 424 | 
            +
            # Could make it load in the background without the Gradio logo closer to the Deep RL space
         | 
| 425 | 
            +
            # Could add graphs / other visual content
         | 
| 426 | 
            +
             | 
| 427 | 
            +
            # Sources:
         | 
| 428 | 
            +
            # https://huggingface.co/spaces/gradio/leaderboard
         | 
| 429 | 
            +
            # https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard
         | 
 
			
