Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Commit 
							
							Β·
						
						216d974
	
1
								Parent(s):
							
							64dd40c
								
Add more OpenAI models
Browse files
    	
        app.py
    CHANGED
    
    | @@ -158,15 +158,23 @@ EXTERNAL_MODELS = [ | |
| 158 | 
             
                "sentence-t5-xxl",
         | 
| 159 | 
             
                "sup-simcse-bert-base-uncased",
         | 
| 160 | 
             
                "text-similarity-ada-001",
         | 
| 161 | 
            -
                "text- | 
| 162 | 
            -
                "text-search-ada- | 
|  | |
|  | |
|  | |
| 163 | 
             
                "unsup-simcse-bert-base-uncased",
         | 
| 164 | 
             
            ]
         | 
| 165 | 
             
            EXTERNAL_MODEL_TO_LINK = {
         | 
| 166 | 
             
                "LASER2": "https://github.com/facebookresearch/LASER",
         | 
| 167 | 
             
                "text-similarity-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
         | 
| 168 | 
            -
                "text- | 
| 169 | 
             
                "text-search-ada-doc-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 170 | 
             
                "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
         | 
| 171 | 
             
                "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
         | 
| 172 | 
             
                "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
         | 
| @@ -219,8 +227,15 @@ EXTERNAL_MODEL_TO_DIM = { | |
| 219 | 
             
                "sentence-t5-xxl": 768,
         | 
| 220 | 
             
                "sup-simcse-bert-base-uncased": 768,
         | 
| 221 | 
             
                "text-similarity-ada-001": 1024,
         | 
|  | |
|  | |
|  | |
| 222 | 
             
                "text-search-ada-query-001": 1024,
         | 
| 223 | 
            -
                "text-search-ada- | 
|  | |
|  | |
|  | |
|  | |
| 224 | 
             
                "unsup-simcse-bert-base-uncased": 768,
         | 
| 225 | 
             
            }
         | 
| 226 |  | 
| @@ -255,7 +270,7 @@ def add_task(examples): | |
| 255 | 
             
                return examples
         | 
| 256 |  | 
| 257 | 
             
            for model in EXTERNAL_MODELS:
         | 
| 258 | 
            -
                ds = load_dataset("mteb/results", model)
         | 
| 259 | 
             
                # For local debugging:
         | 
| 260 | 
             
                #, download_mode='force_redownload', ignore_verifications=True)
         | 
| 261 | 
             
                ds = ds.map(add_lang)
         | 
| @@ -297,7 +312,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, add_emb_dim=False | |
| 297 | 
             
                        res = {k: v for d in results_list for k, v in d.items()}
         | 
| 298 | 
             
                    # Model & at least one result
         | 
| 299 | 
             
                    if len(res) > 1:
         | 
| 300 | 
            -
                         | 
|  | |
| 301 | 
             
                        df_list.append(res)
         | 
| 302 |  | 
| 303 | 
             
                for model in models:
         | 
|  | |
| 158 | 
             
                "sentence-t5-xxl",
         | 
| 159 | 
             
                "sup-simcse-bert-base-uncased",
         | 
| 160 | 
             
                "text-similarity-ada-001",
         | 
| 161 | 
            +
                "text-similarity-curie-001",    
         | 
| 162 | 
            +
                "text-search-ada-001",
         | 
| 163 | 
            +
                "text-search-babbage-001",
         | 
| 164 | 
            +
                "text-search-curie-001",
         | 
| 165 | 
            +
                "text-search-davinci-001",
         | 
| 166 | 
             
                "unsup-simcse-bert-base-uncased",
         | 
| 167 | 
             
            ]
         | 
| 168 | 
             
            EXTERNAL_MODEL_TO_LINK = {
         | 
| 169 | 
             
                "LASER2": "https://github.com/facebookresearch/LASER",
         | 
| 170 | 
             
                "text-similarity-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
         | 
| 171 | 
            +
                "text-similarity-curie-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",    
         | 
| 172 | 
             
                "text-search-ada-doc-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
         | 
| 173 | 
            +
                "text-search-ada-query-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
         | 
| 174 | 
            +
                "text-search-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
         | 
| 175 | 
            +
                "text-search-curie-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
         | 
| 176 | 
            +
                "text-search-babbage-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
         | 
| 177 | 
            +
                "text-search-davinci-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
         | 
| 178 | 
             
                "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
         | 
| 179 | 
             
                "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
         | 
| 180 | 
             
                "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
         | 
|  | |
| 227 | 
             
                "sentence-t5-xxl": 768,
         | 
| 228 | 
             
                "sup-simcse-bert-base-uncased": 768,
         | 
| 229 | 
             
                "text-similarity-ada-001": 1024,
         | 
| 230 | 
            +
                "text-similarity-curie-001": 4096,
         | 
| 231 | 
            +
             | 
| 232 | 
            +
                "text-search-ada-doc-001": 1024,
         | 
| 233 | 
             
                "text-search-ada-query-001": 1024,
         | 
| 234 | 
            +
                "text-search-ada-001": 1024,   
         | 
| 235 | 
            +
                "text-search-babbage-001": 2048,     
         | 
| 236 | 
            +
                "text-search-curie-001": 4096,
         | 
| 237 | 
            +
                "text-search-davinci-001": 12288,   
         | 
| 238 | 
            +
             | 
| 239 | 
             
                "unsup-simcse-bert-base-uncased": 768,
         | 
| 240 | 
             
            }
         | 
| 241 |  | 
|  | |
| 270 | 
             
                return examples
         | 
| 271 |  | 
| 272 | 
             
            for model in EXTERNAL_MODELS:
         | 
| 273 | 
            +
                ds = load_dataset("mteb/results", model, download_mode='force_redownload', ignore_verifications=True)
         | 
| 274 | 
             
                # For local debugging:
         | 
| 275 | 
             
                #, download_mode='force_redownload', ignore_verifications=True)
         | 
| 276 | 
             
                ds = ds.map(add_lang)
         | 
|  | |
| 312 | 
             
                        res = {k: v for d in results_list for k, v in d.items()}
         | 
| 313 | 
             
                    # Model & at least one result
         | 
| 314 | 
             
                    if len(res) > 1:
         | 
| 315 | 
            +
                        if add_emb_dim:
         | 
| 316 | 
            +
                            res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
         | 
| 317 | 
             
                        df_list.append(res)
         | 
| 318 |  | 
| 319 | 
             
                for model in models:
         | 
 
			

