Spaces:
Runtime error
Runtime error
Commit
Β·
216d974
1
Parent(s):
64dd40c
Add more OpenAI models
Browse files
app.py
CHANGED
|
@@ -158,15 +158,23 @@ EXTERNAL_MODELS = [
|
|
| 158 |
"sentence-t5-xxl",
|
| 159 |
"sup-simcse-bert-base-uncased",
|
| 160 |
"text-similarity-ada-001",
|
| 161 |
-
"text-
|
| 162 |
-
"text-search-ada-
|
|
|
|
|
|
|
|
|
|
| 163 |
"unsup-simcse-bert-base-uncased",
|
| 164 |
]
|
| 165 |
EXTERNAL_MODEL_TO_LINK = {
|
| 166 |
"LASER2": "https://github.com/facebookresearch/LASER",
|
| 167 |
"text-similarity-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
| 168 |
-
"text-
|
| 169 |
"text-search-ada-doc-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
| 171 |
"sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
|
| 172 |
"sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
|
|
@@ -219,8 +227,15 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
| 219 |
"sentence-t5-xxl": 768,
|
| 220 |
"sup-simcse-bert-base-uncased": 768,
|
| 221 |
"text-similarity-ada-001": 1024,
|
|
|
|
|
|
|
|
|
|
| 222 |
"text-search-ada-query-001": 1024,
|
| 223 |
-
"text-search-ada-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
"unsup-simcse-bert-base-uncased": 768,
|
| 225 |
}
|
| 226 |
|
|
@@ -255,7 +270,7 @@ def add_task(examples):
|
|
| 255 |
return examples
|
| 256 |
|
| 257 |
for model in EXTERNAL_MODELS:
|
| 258 |
-
ds = load_dataset("mteb/results", model)
|
| 259 |
# For local debugging:
|
| 260 |
#, download_mode='force_redownload', ignore_verifications=True)
|
| 261 |
ds = ds.map(add_lang)
|
|
@@ -297,7 +312,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, add_emb_dim=False
|
|
| 297 |
res = {k: v for d in results_list for k, v in d.items()}
|
| 298 |
# Model & at least one result
|
| 299 |
if len(res) > 1:
|
| 300 |
-
|
|
|
|
| 301 |
df_list.append(res)
|
| 302 |
|
| 303 |
for model in models:
|
|
|
|
| 158 |
"sentence-t5-xxl",
|
| 159 |
"sup-simcse-bert-base-uncased",
|
| 160 |
"text-similarity-ada-001",
|
| 161 |
+
"text-similarity-curie-001",
|
| 162 |
+
"text-search-ada-001",
|
| 163 |
+
"text-search-babbage-001",
|
| 164 |
+
"text-search-curie-001",
|
| 165 |
+
"text-search-davinci-001",
|
| 166 |
"unsup-simcse-bert-base-uncased",
|
| 167 |
]
|
| 168 |
EXTERNAL_MODEL_TO_LINK = {
|
| 169 |
"LASER2": "https://github.com/facebookresearch/LASER",
|
| 170 |
"text-similarity-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
| 171 |
+
"text-similarity-curie-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
| 172 |
"text-search-ada-doc-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
| 173 |
+
"text-search-ada-query-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
| 174 |
+
"text-search-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
| 175 |
+
"text-search-curie-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
| 176 |
+
"text-search-babbage-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
| 177 |
+
"text-search-davinci-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
| 178 |
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
| 179 |
"sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
|
| 180 |
"sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
|
|
|
|
| 227 |
"sentence-t5-xxl": 768,
|
| 228 |
"sup-simcse-bert-base-uncased": 768,
|
| 229 |
"text-similarity-ada-001": 1024,
|
| 230 |
+
"text-similarity-curie-001": 4096,
|
| 231 |
+
|
| 232 |
+
"text-search-ada-doc-001": 1024,
|
| 233 |
"text-search-ada-query-001": 1024,
|
| 234 |
+
"text-search-ada-001": 1024,
|
| 235 |
+
"text-search-babbage-001": 2048,
|
| 236 |
+
"text-search-curie-001": 4096,
|
| 237 |
+
"text-search-davinci-001": 12288,
|
| 238 |
+
|
| 239 |
"unsup-simcse-bert-base-uncased": 768,
|
| 240 |
}
|
| 241 |
|
|
|
|
| 270 |
return examples
|
| 271 |
|
| 272 |
for model in EXTERNAL_MODELS:
|
| 273 |
+
ds = load_dataset("mteb/results", model, download_mode='force_redownload', ignore_verifications=True)
|
| 274 |
# For local debugging:
|
| 275 |
#, download_mode='force_redownload', ignore_verifications=True)
|
| 276 |
ds = ds.map(add_lang)
|
|
|
|
| 312 |
res = {k: v for d in results_list for k, v in d.items()}
|
| 313 |
# Model & at least one result
|
| 314 |
if len(res) > 1:
|
| 315 |
+
if add_emb_dim:
|
| 316 |
+
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
| 317 |
df_list.append(res)
|
| 318 |
|
| 319 |
for model in models:
|