hsila commited on
Commit
a102fa2
·
verified ·
1 Parent(s): e88eaac

Upload model

Browse files
Files changed (3) hide show
  1. README.md +60 -168
  2. config.json +9 -8
  3. model.safetensors +1 -1
README.md CHANGED
@@ -7,14 +7,18 @@ tags:
7
  - mteb
8
  - transformers
9
  - transformers.js
 
 
 
 
10
  model-index:
11
  - name: epoch_0_model
12
  results:
13
  - task:
14
  type: Classification
15
  dataset:
16
- type: mteb/amazon_counterfactual
17
  name: MTEB AmazonCounterfactualClassification (en)
 
18
  config: en
19
  split: test
20
  revision: e8379541af4e31359cca9fbcf4b00f2671dba205
@@ -28,8 +32,8 @@ model-index:
28
  - task:
29
  type: Classification
30
  dataset:
31
- type: mteb/amazon_polarity
32
  name: MTEB AmazonPolarityClassification
 
33
  config: default
34
  split: test
35
  revision: e2d317d38cd51312af73b3d32a06d1a08b442046
@@ -43,8 +47,8 @@ model-index:
43
  - task:
44
  type: Classification
45
  dataset:
46
- type: mteb/amazon_reviews_multi
47
  name: MTEB AmazonReviewsClassification (en)
 
48
  config: en
49
  split: test
50
  revision: 1399c76144fd37290681b995c656ef9b2e06e26d
@@ -56,8 +60,8 @@ model-index:
56
  - task:
57
  type: Retrieval
58
  dataset:
59
- type: arguana
60
  name: MTEB ArguAna
 
61
  config: default
62
  split: test
63
  revision: None
@@ -125,8 +129,8 @@ model-index:
125
  - task:
126
  type: Clustering
127
  dataset:
128
- type: mteb/arxiv-clustering-p2p
129
  name: MTEB ArxivClusteringP2P
 
130
  config: default
131
  split: test
132
  revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
@@ -136,8 +140,8 @@ model-index:
136
  - task:
137
  type: Clustering
138
  dataset:
139
- type: mteb/arxiv-clustering-s2s
140
  name: MTEB ArxivClusteringS2S
 
141
  config: default
142
  split: test
143
  revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
@@ -147,8 +151,8 @@ model-index:
147
  - task:
148
  type: Reranking
149
  dataset:
150
- type: mteb/askubuntudupquestions-reranking
151
  name: MTEB AskUbuntuDupQuestions
 
152
  config: default
153
  split: test
154
  revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
@@ -160,8 +164,8 @@ model-index:
160
  - task:
161
  type: STS
162
  dataset:
163
- type: mteb/biosses-sts
164
  name: MTEB BIOSSES
 
165
  config: default
166
  split: test
167
  revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
@@ -181,8 +185,8 @@ model-index:
181
  - task:
182
  type: Classification
183
  dataset:
184
- type: mteb/banking77
185
  name: MTEB Banking77Classification
 
186
  config: default
187
  split: test
188
  revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
@@ -194,8 +198,8 @@ model-index:
194
  - task:
195
  type: Clustering
196
  dataset:
197
- type: mteb/biorxiv-clustering-p2p
198
  name: MTEB BiorxivClusteringP2P
 
199
  config: default
200
  split: test
201
  revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
@@ -205,8 +209,8 @@ model-index:
205
  - task:
206
  type: Clustering
207
  dataset:
208
- type: mteb/biorxiv-clustering-s2s
209
  name: MTEB BiorxivClusteringS2S
 
210
  config: default
211
  split: test
212
  revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
@@ -216,8 +220,8 @@ model-index:
216
  - task:
217
  type: Retrieval
218
  dataset:
219
- type: BeIR/cqadupstack
220
  name: MTEB CQADupstackAndroidRetrieval
 
221
  config: default
222
  split: test
223
  revision: None
@@ -282,15 +286,6 @@ model-index:
282
  value: 41.754999999999995
283
  - type: recall_at_5
284
  value: 48.296
285
- - task:
286
- type: Retrieval
287
- dataset:
288
- type: BeIR/cqadupstack
289
- name: MTEB CQADupstackEnglishRetrieval
290
- config: default
291
- split: test
292
- revision: None
293
- metrics:
294
  - type: map_at_1
295
  value: 30.262
296
  - type: map_at_10
@@ -351,15 +346,6 @@ model-index:
351
  value: 43.129
352
  - type: recall_at_5
353
  value: 48.336
354
- - task:
355
- type: Retrieval
356
- dataset:
357
- type: BeIR/cqadupstack
358
- name: MTEB CQADupstackGamingRetrieval
359
- config: default
360
- split: test
361
- revision: None
362
- metrics:
363
  - type: map_at_1
364
  value: 39.951
365
  - type: map_at_10
@@ -420,15 +406,6 @@ model-index:
420
  value: 56.032000000000004
421
  - type: recall_at_5
422
  value: 61.629999999999995
423
- - task:
424
- type: Retrieval
425
- dataset:
426
- type: BeIR/cqadupstack
427
- name: MTEB CQADupstackGisRetrieval
428
- config: default
429
- split: test
430
- revision: None
431
- metrics:
432
  - type: map_at_1
433
  value: 25.566
434
  - type: map_at_10
@@ -489,15 +466,6 @@ model-index:
489
  value: 37.43
490
  - type: recall_at_5
491
  value: 41.894999999999996
492
- - task:
493
- type: Retrieval
494
- dataset:
495
- type: BeIR/cqadupstack
496
- name: MTEB CQADupstackMathematicaRetrieval
497
- config: default
498
- split: test
499
- revision: None
500
- metrics:
501
  - type: map_at_1
502
  value: 16.663
503
  - type: map_at_10
@@ -558,15 +526,6 @@ model-index:
558
  value: 25.907999999999998
559
  - type: recall_at_5
560
  value: 31.214
561
- - task:
562
- type: Retrieval
563
- dataset:
564
- type: BeIR/cqadupstack
565
- name: MTEB CQADupstackPhysicsRetrieval
566
- config: default
567
- split: test
568
- revision: None
569
- metrics:
570
  - type: map_at_1
571
  value: 27.695999999999998
572
  - type: map_at_10
@@ -627,15 +586,6 @@ model-index:
627
  value: 41.13
628
  - type: recall_at_5
629
  value: 46.872
630
- - task:
631
- type: Retrieval
632
- dataset:
633
- type: BeIR/cqadupstack
634
- name: MTEB CQADupstackProgrammersRetrieval
635
- config: default
636
- split: test
637
- revision: None
638
- metrics:
639
  - type: map_at_1
640
  value: 24.108
641
  - type: map_at_10
@@ -696,15 +646,6 @@ model-index:
696
  value: 37.662
697
  - type: recall_at_5
698
  value: 42.565
699
- - task:
700
- type: Retrieval
701
- dataset:
702
- type: BeIR/cqadupstack
703
- name: MTEB CQADupstackRetrieval
704
- config: default
705
- split: test
706
- revision: None
707
- metrics:
708
  - type: map_at_1
709
  value: 25.00791666666667
710
  - type: map_at_10
@@ -765,15 +706,6 @@ model-index:
765
  value: 36.660916666666665
766
  - type: recall_at_5
767
  value: 41.94149999999999
768
- - task:
769
- type: Retrieval
770
- dataset:
771
- type: BeIR/cqadupstack
772
- name: MTEB CQADupstackStatsRetrieval
773
- config: default
774
- split: test
775
- revision: None
776
- metrics:
777
  - type: map_at_1
778
  value: 23.521
779
  - type: map_at_10
@@ -834,15 +766,6 @@ model-index:
834
  value: 32.614
835
  - type: recall_at_5
836
  value: 37.15
837
- - task:
838
- type: Retrieval
839
- dataset:
840
- type: BeIR/cqadupstack
841
- name: MTEB CQADupstackTexRetrieval
842
- config: default
843
- split: test
844
- revision: None
845
- metrics:
846
  - type: map_at_1
847
  value: 16.236
848
  - type: map_at_10
@@ -903,15 +826,6 @@ model-index:
903
  value: 26.179999999999996
904
  - type: recall_at_5
905
  value: 30.712
906
- - task:
907
- type: Retrieval
908
- dataset:
909
- type: BeIR/cqadupstack
910
- name: MTEB CQADupstackUnixRetrieval
911
- config: default
912
- split: test
913
- revision: None
914
- metrics:
915
  - type: map_at_1
916
  value: 24.11
917
  - type: map_at_10
@@ -972,15 +886,6 @@ model-index:
972
  value: 34.724
973
  - type: recall_at_5
974
  value: 39.925
975
- - task:
976
- type: Retrieval
977
- dataset:
978
- type: BeIR/cqadupstack
979
- name: MTEB CQADupstackWebmastersRetrieval
980
- config: default
981
- split: test
982
- revision: None
983
- metrics:
984
  - type: map_at_1
985
  value: 22.091
986
  - type: map_at_10
@@ -1041,15 +946,6 @@ model-index:
1041
  value: 33.158
1042
  - type: recall_at_5
1043
  value: 39.086999999999996
1044
- - task:
1045
- type: Retrieval
1046
- dataset:
1047
- type: BeIR/cqadupstack
1048
- name: MTEB CQADupstackWordpressRetrieval
1049
- config: default
1050
- split: test
1051
- revision: None
1052
- metrics:
1053
  - type: map_at_1
1054
  value: 19.883
1055
  - type: map_at_10
@@ -1113,8 +1009,8 @@ model-index:
1113
  - task:
1114
  type: Retrieval
1115
  dataset:
1116
- type: climate-fever
1117
  name: MTEB ClimateFEVER
 
1118
  config: default
1119
  split: test
1120
  revision: None
@@ -1182,8 +1078,8 @@ model-index:
1182
  - task:
1183
  type: Retrieval
1184
  dataset:
1185
- type: dbpedia-entity
1186
  name: MTEB DBPedia
 
1187
  config: default
1188
  split: test
1189
  revision: None
@@ -1251,8 +1147,8 @@ model-index:
1251
  - task:
1252
  type: Classification
1253
  dataset:
1254
- type: mteb/emotion
1255
  name: MTEB EmotionClassification
 
1256
  config: default
1257
  split: test
1258
  revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
@@ -1264,8 +1160,8 @@ model-index:
1264
  - task:
1265
  type: Retrieval
1266
  dataset:
1267
- type: fever
1268
  name: MTEB FEVER
 
1269
  config: default
1270
  split: test
1271
  revision: None
@@ -1333,8 +1229,8 @@ model-index:
1333
  - task:
1334
  type: Retrieval
1335
  dataset:
1336
- type: fiqa
1337
  name: MTEB FiQA2018
 
1338
  config: default
1339
  split: test
1340
  revision: None
@@ -1402,8 +1298,8 @@ model-index:
1402
  - task:
1403
  type: Retrieval
1404
  dataset:
1405
- type: hotpotqa
1406
  name: MTEB HotpotQA
 
1407
  config: default
1408
  split: test
1409
  revision: None
@@ -1471,8 +1367,8 @@ model-index:
1471
  - task:
1472
  type: Classification
1473
  dataset:
1474
- type: mteb/imdb
1475
  name: MTEB ImdbClassification
 
1476
  config: default
1477
  split: test
1478
  revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
@@ -1486,8 +1382,8 @@ model-index:
1486
  - task:
1487
  type: Retrieval
1488
  dataset:
1489
- type: msmarco
1490
  name: MTEB MSMARCO
 
1491
  config: default
1492
  split: dev
1493
  revision: None
@@ -1555,8 +1451,8 @@ model-index:
1555
  - task:
1556
  type: Classification
1557
  dataset:
1558
- type: mteb/mtop_domain
1559
  name: MTEB MTOPDomainClassification (en)
 
1560
  config: en
1561
  split: test
1562
  revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
@@ -1568,8 +1464,8 @@ model-index:
1568
  - task:
1569
  type: Classification
1570
  dataset:
1571
- type: mteb/mtop_intent
1572
  name: MTEB MTOPIntentClassification (en)
 
1573
  config: en
1574
  split: test
1575
  revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
@@ -1581,8 +1477,8 @@ model-index:
1581
  - task:
1582
  type: Classification
1583
  dataset:
1584
- type: mteb/amazon_massive_intent
1585
  name: MTEB MassiveIntentClassification (en)
 
1586
  config: en
1587
  split: test
1588
  revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
@@ -1594,8 +1490,8 @@ model-index:
1594
  - task:
1595
  type: Classification
1596
  dataset:
1597
- type: mteb/amazon_massive_scenario
1598
  name: MTEB MassiveScenarioClassification (en)
 
1599
  config: en
1600
  split: test
1601
  revision: 7d571f92784cd94a019292a1f45445077d0ef634
@@ -1607,8 +1503,8 @@ model-index:
1607
  - task:
1608
  type: Clustering
1609
  dataset:
1610
- type: mteb/medrxiv-clustering-p2p
1611
  name: MTEB MedrxivClusteringP2P
 
1612
  config: default
1613
  split: test
1614
  revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
@@ -1618,8 +1514,8 @@ model-index:
1618
  - task:
1619
  type: Clustering
1620
  dataset:
1621
- type: mteb/medrxiv-clustering-s2s
1622
  name: MTEB MedrxivClusteringS2S
 
1623
  config: default
1624
  split: test
1625
  revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
@@ -1629,8 +1525,8 @@ model-index:
1629
  - task:
1630
  type: Reranking
1631
  dataset:
1632
- type: mteb/mind_small
1633
  name: MTEB MindSmallReranking
 
1634
  config: default
1635
  split: test
1636
  revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
@@ -1642,8 +1538,8 @@ model-index:
1642
  - task:
1643
  type: Retrieval
1644
  dataset:
1645
- type: nfcorpus
1646
  name: MTEB NFCorpus
 
1647
  config: default
1648
  split: test
1649
  revision: None
@@ -1711,8 +1607,8 @@ model-index:
1711
  - task:
1712
  type: Retrieval
1713
  dataset:
1714
- type: nq
1715
  name: MTEB NQ
 
1716
  config: default
1717
  split: test
1718
  revision: None
@@ -1780,8 +1676,8 @@ model-index:
1780
  - task:
1781
  type: Retrieval
1782
  dataset:
1783
- type: quora
1784
  name: MTEB QuoraRetrieval
 
1785
  config: default
1786
  split: test
1787
  revision: None
@@ -1849,8 +1745,8 @@ model-index:
1849
  - task:
1850
  type: Clustering
1851
  dataset:
1852
- type: mteb/reddit-clustering
1853
  name: MTEB RedditClustering
 
1854
  config: default
1855
  split: test
1856
  revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
@@ -1860,8 +1756,8 @@ model-index:
1860
  - task:
1861
  type: Clustering
1862
  dataset:
1863
- type: mteb/reddit-clustering-p2p
1864
  name: MTEB RedditClusteringP2P
 
1865
  config: default
1866
  split: test
1867
  revision: 282350215ef01743dc01b456c7f5241fa8937f16
@@ -1871,8 +1767,8 @@ model-index:
1871
  - task:
1872
  type: Retrieval
1873
  dataset:
1874
- type: scidocs
1875
  name: MTEB SCIDOCS
 
1876
  config: default
1877
  split: test
1878
  revision: None
@@ -1940,8 +1836,8 @@ model-index:
1940
  - task:
1941
  type: STS
1942
  dataset:
1943
- type: mteb/sickr-sts
1944
  name: MTEB SICK-R
 
1945
  config: default
1946
  split: test
1947
  revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
@@ -1961,8 +1857,8 @@ model-index:
1961
  - task:
1962
  type: STS
1963
  dataset:
1964
- type: mteb/sts12-sts
1965
  name: MTEB STS12
 
1966
  config: default
1967
  split: test
1968
  revision: a0d554a64d88156834ff5ae9920b964011b16384
@@ -1982,8 +1878,8 @@ model-index:
1982
  - task:
1983
  type: STS
1984
  dataset:
1985
- type: mteb/sts13-sts
1986
  name: MTEB STS13
 
1987
  config: default
1988
  split: test
1989
  revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
@@ -2003,8 +1899,8 @@ model-index:
2003
  - task:
2004
  type: STS
2005
  dataset:
2006
- type: mteb/sts14-sts
2007
  name: MTEB STS14
 
2008
  config: default
2009
  split: test
2010
  revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
@@ -2024,8 +1920,8 @@ model-index:
2024
  - task:
2025
  type: STS
2026
  dataset:
2027
- type: mteb/sts15-sts
2028
  name: MTEB STS15
 
2029
  config: default
2030
  split: test
2031
  revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
@@ -2045,8 +1941,8 @@ model-index:
2045
  - task:
2046
  type: STS
2047
  dataset:
2048
- type: mteb/sts16-sts
2049
  name: MTEB STS16
 
2050
  config: default
2051
  split: test
2052
  revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
@@ -2066,8 +1962,8 @@ model-index:
2066
  - task:
2067
  type: STS
2068
  dataset:
2069
- type: mteb/sts17-crosslingual-sts
2070
  name: MTEB STS17 (en-en)
 
2071
  config: en-en
2072
  split: test
2073
  revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
@@ -2087,8 +1983,8 @@ model-index:
2087
  - task:
2088
  type: STS
2089
  dataset:
2090
- type: mteb/sts22-crosslingual-sts
2091
  name: MTEB STS22 (en)
 
2092
  config: en
2093
  split: test
2094
  revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
@@ -2108,8 +2004,8 @@ model-index:
2108
  - task:
2109
  type: STS
2110
  dataset:
2111
- type: mteb/stsbenchmark-sts
2112
  name: MTEB STSBenchmark
 
2113
  config: default
2114
  split: test
2115
  revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
@@ -2129,8 +2025,8 @@ model-index:
2129
  - task:
2130
  type: Reranking
2131
  dataset:
2132
- type: mteb/scidocs-reranking
2133
  name: MTEB SciDocsRR
 
2134
  config: default
2135
  split: test
2136
  revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
@@ -2142,8 +2038,8 @@ model-index:
2142
  - task:
2143
  type: Retrieval
2144
  dataset:
2145
- type: scifact
2146
  name: MTEB SciFact
 
2147
  config: default
2148
  split: test
2149
  revision: None
@@ -2211,8 +2107,8 @@ model-index:
2211
  - task:
2212
  type: PairClassification
2213
  dataset:
2214
- type: mteb/sprintduplicatequestions-pairclassification
2215
  name: MTEB SprintDuplicateQuestions
 
2216
  config: default
2217
  split: test
2218
  revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
@@ -2266,8 +2162,8 @@ model-index:
2266
  - task:
2267
  type: Clustering
2268
  dataset:
2269
- type: mteb/stackexchange-clustering
2270
  name: MTEB StackExchangeClustering
 
2271
  config: default
2272
  split: test
2273
  revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
@@ -2277,8 +2173,8 @@ model-index:
2277
  - task:
2278
  type: Clustering
2279
  dataset:
2280
- type: mteb/stackexchange-clustering-p2p
2281
  name: MTEB StackExchangeClusteringP2P
 
2282
  config: default
2283
  split: test
2284
  revision: 815ca46b2622cec33ccafc3735d572c266efdb44
@@ -2288,8 +2184,8 @@ model-index:
2288
  - task:
2289
  type: Reranking
2290
  dataset:
2291
- type: mteb/stackoverflowdupquestions-reranking
2292
  name: MTEB StackOverflowDupQuestions
 
2293
  config: default
2294
  split: test
2295
  revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
@@ -2301,8 +2197,8 @@ model-index:
2301
  - task:
2302
  type: Summarization
2303
  dataset:
2304
- type: mteb/summeval
2305
  name: MTEB SummEval
 
2306
  config: default
2307
  split: test
2308
  revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
@@ -2318,8 +2214,8 @@ model-index:
2318
  - task:
2319
  type: Retrieval
2320
  dataset:
2321
- type: trec-covid
2322
  name: MTEB TRECCOVID
 
2323
  config: default
2324
  split: test
2325
  revision: None
@@ -2387,8 +2283,8 @@ model-index:
2387
  - task:
2388
  type: Retrieval
2389
  dataset:
2390
- type: webis-touche2020
2391
  name: MTEB Touche2020
 
2392
  config: default
2393
  split: test
2394
  revision: None
@@ -2456,8 +2352,8 @@ model-index:
2456
  - task:
2457
  type: Classification
2458
  dataset:
2459
- type: mteb/toxic_conversations_50k
2460
  name: MTEB ToxicConversationsClassification
 
2461
  config: default
2462
  split: test
2463
  revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
@@ -2471,8 +2367,8 @@ model-index:
2471
  - task:
2472
  type: Classification
2473
  dataset:
2474
- type: mteb/tweet_sentiment_extraction
2475
  name: MTEB TweetSentimentExtractionClassification
 
2476
  config: default
2477
  split: test
2478
  revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
@@ -2484,8 +2380,8 @@ model-index:
2484
  - task:
2485
  type: Clustering
2486
  dataset:
2487
- type: mteb/twentynewsgroups-clustering
2488
  name: MTEB TwentyNewsgroupsClustering
 
2489
  config: default
2490
  split: test
2491
  revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
@@ -2495,8 +2391,8 @@ model-index:
2495
  - task:
2496
  type: PairClassification
2497
  dataset:
2498
- type: mteb/twittersemeval2015-pairclassification
2499
  name: MTEB TwitterSemEval2015
 
2500
  config: default
2501
  split: test
2502
  revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
@@ -2550,8 +2446,8 @@ model-index:
2550
  - task:
2551
  type: PairClassification
2552
  dataset:
2553
- type: mteb/twitterurlcorpus-pairclassification
2554
  name: MTEB TwitterURLCorpus
 
2555
  config: default
2556
  split: test
2557
  revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
@@ -2602,10 +2498,6 @@ model-index:
2602
  value: 86.71257651501476
2603
  - type: max_f1
2604
  value: 79.13867741453949
2605
- license: apache-2.0
2606
- language:
2607
- - en
2608
- new_version: nomic-ai/nomic-embed-text-v1.5
2609
  ---
2610
 
2611
 
 
7
  - mteb
8
  - transformers
9
  - transformers.js
10
+ license: apache-2.0
11
+ language:
12
+ - en
13
+ new_version: nomic-ai/nomic-embed-text-v1.5
14
  model-index:
15
  - name: epoch_0_model
16
  results:
17
  - task:
18
  type: Classification
19
  dataset:
 
20
  name: MTEB AmazonCounterfactualClassification (en)
21
+ type: mteb/amazon_counterfactual
22
  config: en
23
  split: test
24
  revision: e8379541af4e31359cca9fbcf4b00f2671dba205
 
32
  - task:
33
  type: Classification
34
  dataset:
 
35
  name: MTEB AmazonPolarityClassification
36
+ type: mteb/amazon_polarity
37
  config: default
38
  split: test
39
  revision: e2d317d38cd51312af73b3d32a06d1a08b442046
 
47
  - task:
48
  type: Classification
49
  dataset:
 
50
  name: MTEB AmazonReviewsClassification (en)
51
+ type: mteb/amazon_reviews_multi
52
  config: en
53
  split: test
54
  revision: 1399c76144fd37290681b995c656ef9b2e06e26d
 
60
  - task:
61
  type: Retrieval
62
  dataset:
 
63
  name: MTEB ArguAna
64
+ type: arguana
65
  config: default
66
  split: test
67
  revision: None
 
129
  - task:
130
  type: Clustering
131
  dataset:
 
132
  name: MTEB ArxivClusteringP2P
133
+ type: mteb/arxiv-clustering-p2p
134
  config: default
135
  split: test
136
  revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
 
140
  - task:
141
  type: Clustering
142
  dataset:
 
143
  name: MTEB ArxivClusteringS2S
144
+ type: mteb/arxiv-clustering-s2s
145
  config: default
146
  split: test
147
  revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
 
151
  - task:
152
  type: Reranking
153
  dataset:
 
154
  name: MTEB AskUbuntuDupQuestions
155
+ type: mteb/askubuntudupquestions-reranking
156
  config: default
157
  split: test
158
  revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
 
164
  - task:
165
  type: STS
166
  dataset:
 
167
  name: MTEB BIOSSES
168
+ type: mteb/biosses-sts
169
  config: default
170
  split: test
171
  revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
 
185
  - task:
186
  type: Classification
187
  dataset:
 
188
  name: MTEB Banking77Classification
189
+ type: mteb/banking77
190
  config: default
191
  split: test
192
  revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
 
198
  - task:
199
  type: Clustering
200
  dataset:
 
201
  name: MTEB BiorxivClusteringP2P
202
+ type: mteb/biorxiv-clustering-p2p
203
  config: default
204
  split: test
205
  revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
 
209
  - task:
210
  type: Clustering
211
  dataset:
 
212
  name: MTEB BiorxivClusteringS2S
213
+ type: mteb/biorxiv-clustering-s2s
214
  config: default
215
  split: test
216
  revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
 
220
  - task:
221
  type: Retrieval
222
  dataset:
 
223
  name: MTEB CQADupstackAndroidRetrieval
224
+ type: BeIR/cqadupstack
225
  config: default
226
  split: test
227
  revision: None
 
286
  value: 41.754999999999995
287
  - type: recall_at_5
288
  value: 48.296
 
 
 
 
 
 
 
 
 
289
  - type: map_at_1
290
  value: 30.262
291
  - type: map_at_10
 
346
  value: 43.129
347
  - type: recall_at_5
348
  value: 48.336
 
 
 
 
 
 
 
 
 
349
  - type: map_at_1
350
  value: 39.951
351
  - type: map_at_10
 
406
  value: 56.032000000000004
407
  - type: recall_at_5
408
  value: 61.629999999999995
 
 
 
 
 
 
 
 
 
409
  - type: map_at_1
410
  value: 25.566
411
  - type: map_at_10
 
466
  value: 37.43
467
  - type: recall_at_5
468
  value: 41.894999999999996
 
 
 
 
 
 
 
 
 
469
  - type: map_at_1
470
  value: 16.663
471
  - type: map_at_10
 
526
  value: 25.907999999999998
527
  - type: recall_at_5
528
  value: 31.214
 
 
 
 
 
 
 
 
 
529
  - type: map_at_1
530
  value: 27.695999999999998
531
  - type: map_at_10
 
586
  value: 41.13
587
  - type: recall_at_5
588
  value: 46.872
 
 
 
 
 
 
 
 
 
589
  - type: map_at_1
590
  value: 24.108
591
  - type: map_at_10
 
646
  value: 37.662
647
  - type: recall_at_5
648
  value: 42.565
 
 
 
 
 
 
 
 
 
649
  - type: map_at_1
650
  value: 25.00791666666667
651
  - type: map_at_10
 
706
  value: 36.660916666666665
707
  - type: recall_at_5
708
  value: 41.94149999999999
 
 
 
 
 
 
 
 
 
709
  - type: map_at_1
710
  value: 23.521
711
  - type: map_at_10
 
766
  value: 32.614
767
  - type: recall_at_5
768
  value: 37.15
 
 
 
 
 
 
 
 
 
769
  - type: map_at_1
770
  value: 16.236
771
  - type: map_at_10
 
826
  value: 26.179999999999996
827
  - type: recall_at_5
828
  value: 30.712
 
 
 
 
 
 
 
 
 
829
  - type: map_at_1
830
  value: 24.11
831
  - type: map_at_10
 
886
  value: 34.724
887
  - type: recall_at_5
888
  value: 39.925
 
 
 
 
 
 
 
 
 
889
  - type: map_at_1
890
  value: 22.091
891
  - type: map_at_10
 
946
  value: 33.158
947
  - type: recall_at_5
948
  value: 39.086999999999996
 
 
 
 
 
 
 
 
 
949
  - type: map_at_1
950
  value: 19.883
951
  - type: map_at_10
 
1009
  - task:
1010
  type: Retrieval
1011
  dataset:
 
1012
  name: MTEB ClimateFEVER
1013
+ type: climate-fever
1014
  config: default
1015
  split: test
1016
  revision: None
 
1078
  - task:
1079
  type: Retrieval
1080
  dataset:
 
1081
  name: MTEB DBPedia
1082
+ type: dbpedia-entity
1083
  config: default
1084
  split: test
1085
  revision: None
 
1147
  - task:
1148
  type: Classification
1149
  dataset:
 
1150
  name: MTEB EmotionClassification
1151
+ type: mteb/emotion
1152
  config: default
1153
  split: test
1154
  revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
 
1160
  - task:
1161
  type: Retrieval
1162
  dataset:
 
1163
  name: MTEB FEVER
1164
+ type: fever
1165
  config: default
1166
  split: test
1167
  revision: None
 
1229
  - task:
1230
  type: Retrieval
1231
  dataset:
 
1232
  name: MTEB FiQA2018
1233
+ type: fiqa
1234
  config: default
1235
  split: test
1236
  revision: None
 
1298
  - task:
1299
  type: Retrieval
1300
  dataset:
 
1301
  name: MTEB HotpotQA
1302
+ type: hotpotqa
1303
  config: default
1304
  split: test
1305
  revision: None
 
1367
  - task:
1368
  type: Classification
1369
  dataset:
 
1370
  name: MTEB ImdbClassification
1371
+ type: mteb/imdb
1372
  config: default
1373
  split: test
1374
  revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
 
1382
  - task:
1383
  type: Retrieval
1384
  dataset:
 
1385
  name: MTEB MSMARCO
1386
+ type: msmarco
1387
  config: default
1388
  split: dev
1389
  revision: None
 
1451
  - task:
1452
  type: Classification
1453
  dataset:
 
1454
  name: MTEB MTOPDomainClassification (en)
1455
+ type: mteb/mtop_domain
1456
  config: en
1457
  split: test
1458
  revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
 
1464
  - task:
1465
  type: Classification
1466
  dataset:
 
1467
  name: MTEB MTOPIntentClassification (en)
1468
+ type: mteb/mtop_intent
1469
  config: en
1470
  split: test
1471
  revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
 
1477
  - task:
1478
  type: Classification
1479
  dataset:
 
1480
  name: MTEB MassiveIntentClassification (en)
1481
+ type: mteb/amazon_massive_intent
1482
  config: en
1483
  split: test
1484
  revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
 
1490
  - task:
1491
  type: Classification
1492
  dataset:
 
1493
  name: MTEB MassiveScenarioClassification (en)
1494
+ type: mteb/amazon_massive_scenario
1495
  config: en
1496
  split: test
1497
  revision: 7d571f92784cd94a019292a1f45445077d0ef634
 
1503
  - task:
1504
  type: Clustering
1505
  dataset:
 
1506
  name: MTEB MedrxivClusteringP2P
1507
+ type: mteb/medrxiv-clustering-p2p
1508
  config: default
1509
  split: test
1510
  revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
 
1514
  - task:
1515
  type: Clustering
1516
  dataset:
 
1517
  name: MTEB MedrxivClusteringS2S
1518
+ type: mteb/medrxiv-clustering-s2s
1519
  config: default
1520
  split: test
1521
  revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
 
1525
  - task:
1526
  type: Reranking
1527
  dataset:
 
1528
  name: MTEB MindSmallReranking
1529
+ type: mteb/mind_small
1530
  config: default
1531
  split: test
1532
  revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
 
1538
  - task:
1539
  type: Retrieval
1540
  dataset:
 
1541
  name: MTEB NFCorpus
1542
+ type: nfcorpus
1543
  config: default
1544
  split: test
1545
  revision: None
 
1607
  - task:
1608
  type: Retrieval
1609
  dataset:
 
1610
  name: MTEB NQ
1611
+ type: nq
1612
  config: default
1613
  split: test
1614
  revision: None
 
1676
  - task:
1677
  type: Retrieval
1678
  dataset:
 
1679
  name: MTEB QuoraRetrieval
1680
+ type: quora
1681
  config: default
1682
  split: test
1683
  revision: None
 
1745
  - task:
1746
  type: Clustering
1747
  dataset:
 
1748
  name: MTEB RedditClustering
1749
+ type: mteb/reddit-clustering
1750
  config: default
1751
  split: test
1752
  revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
 
1756
  - task:
1757
  type: Clustering
1758
  dataset:
 
1759
  name: MTEB RedditClusteringP2P
1760
+ type: mteb/reddit-clustering-p2p
1761
  config: default
1762
  split: test
1763
  revision: 282350215ef01743dc01b456c7f5241fa8937f16
 
1767
  - task:
1768
  type: Retrieval
1769
  dataset:
 
1770
  name: MTEB SCIDOCS
1771
+ type: scidocs
1772
  config: default
1773
  split: test
1774
  revision: None
 
1836
  - task:
1837
  type: STS
1838
  dataset:
 
1839
  name: MTEB SICK-R
1840
+ type: mteb/sickr-sts
1841
  config: default
1842
  split: test
1843
  revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
 
1857
  - task:
1858
  type: STS
1859
  dataset:
 
1860
  name: MTEB STS12
1861
+ type: mteb/sts12-sts
1862
  config: default
1863
  split: test
1864
  revision: a0d554a64d88156834ff5ae9920b964011b16384
 
1878
  - task:
1879
  type: STS
1880
  dataset:
 
1881
  name: MTEB STS13
1882
+ type: mteb/sts13-sts
1883
  config: default
1884
  split: test
1885
  revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
 
1899
  - task:
1900
  type: STS
1901
  dataset:
 
1902
  name: MTEB STS14
1903
+ type: mteb/sts14-sts
1904
  config: default
1905
  split: test
1906
  revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
 
1920
  - task:
1921
  type: STS
1922
  dataset:
 
1923
  name: MTEB STS15
1924
+ type: mteb/sts15-sts
1925
  config: default
1926
  split: test
1927
  revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
 
1941
  - task:
1942
  type: STS
1943
  dataset:
 
1944
  name: MTEB STS16
1945
+ type: mteb/sts16-sts
1946
  config: default
1947
  split: test
1948
  revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
 
1962
  - task:
1963
  type: STS
1964
  dataset:
 
1965
  name: MTEB STS17 (en-en)
1966
+ type: mteb/sts17-crosslingual-sts
1967
  config: en-en
1968
  split: test
1969
  revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
 
1983
  - task:
1984
  type: STS
1985
  dataset:
 
1986
  name: MTEB STS22 (en)
1987
+ type: mteb/sts22-crosslingual-sts
1988
  config: en
1989
  split: test
1990
  revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
 
2004
  - task:
2005
  type: STS
2006
  dataset:
 
2007
  name: MTEB STSBenchmark
2008
+ type: mteb/stsbenchmark-sts
2009
  config: default
2010
  split: test
2011
  revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
 
2025
  - task:
2026
  type: Reranking
2027
  dataset:
 
2028
  name: MTEB SciDocsRR
2029
+ type: mteb/scidocs-reranking
2030
  config: default
2031
  split: test
2032
  revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
 
2038
  - task:
2039
  type: Retrieval
2040
  dataset:
 
2041
  name: MTEB SciFact
2042
+ type: scifact
2043
  config: default
2044
  split: test
2045
  revision: None
 
2107
  - task:
2108
  type: PairClassification
2109
  dataset:
 
2110
  name: MTEB SprintDuplicateQuestions
2111
+ type: mteb/sprintduplicatequestions-pairclassification
2112
  config: default
2113
  split: test
2114
  revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
 
2162
  - task:
2163
  type: Clustering
2164
  dataset:
 
2165
  name: MTEB StackExchangeClustering
2166
+ type: mteb/stackexchange-clustering
2167
  config: default
2168
  split: test
2169
  revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
 
2173
  - task:
2174
  type: Clustering
2175
  dataset:
 
2176
  name: MTEB StackExchangeClusteringP2P
2177
+ type: mteb/stackexchange-clustering-p2p
2178
  config: default
2179
  split: test
2180
  revision: 815ca46b2622cec33ccafc3735d572c266efdb44
 
2184
  - task:
2185
  type: Reranking
2186
  dataset:
 
2187
  name: MTEB StackOverflowDupQuestions
2188
+ type: mteb/stackoverflowdupquestions-reranking
2189
  config: default
2190
  split: test
2191
  revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
 
2197
  - task:
2198
  type: Summarization
2199
  dataset:
 
2200
  name: MTEB SummEval
2201
+ type: mteb/summeval
2202
  config: default
2203
  split: test
2204
  revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
 
2214
  - task:
2215
  type: Retrieval
2216
  dataset:
 
2217
  name: MTEB TRECCOVID
2218
+ type: trec-covid
2219
  config: default
2220
  split: test
2221
  revision: None
 
2283
  - task:
2284
  type: Retrieval
2285
  dataset:
 
2286
  name: MTEB Touche2020
2287
+ type: webis-touche2020
2288
  config: default
2289
  split: test
2290
  revision: None
 
2352
  - task:
2353
  type: Classification
2354
  dataset:
 
2355
  name: MTEB ToxicConversationsClassification
2356
+ type: mteb/toxic_conversations_50k
2357
  config: default
2358
  split: test
2359
  revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
 
2367
  - task:
2368
  type: Classification
2369
  dataset:
 
2370
  name: MTEB TweetSentimentExtractionClassification
2371
+ type: mteb/tweet_sentiment_extraction
2372
  config: default
2373
  split: test
2374
  revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
 
2380
  - task:
2381
  type: Clustering
2382
  dataset:
 
2383
  name: MTEB TwentyNewsgroupsClustering
2384
+ type: mteb/twentynewsgroups-clustering
2385
  config: default
2386
  split: test
2387
  revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
 
2391
  - task:
2392
  type: PairClassification
2393
  dataset:
 
2394
  name: MTEB TwitterSemEval2015
2395
+ type: mteb/twittersemeval2015-pairclassification
2396
  config: default
2397
  split: test
2398
  revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
 
2446
  - task:
2447
  type: PairClassification
2448
  dataset:
 
2449
  name: MTEB TwitterURLCorpus
2450
+ type: mteb/twitterurlcorpus-pairclassification
2451
  config: default
2452
  split: test
2453
  revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
 
2498
  value: 86.71257651501476
2499
  - type: max_f1
2500
  value: 79.13867741453949
 
 
 
 
2501
  ---
2502
 
2503
 
config.json CHANGED
@@ -4,20 +4,21 @@
4
  "NomicBertModel"
5
  ],
6
  "attn_pdrop": 0.0,
7
- "auto_map": {
8
- "AutoConfig": "BASF-AI/nomic-bert-2048--configuration_hf_nomic_bert.NomicBertConfig",
9
- "AutoModel": "BASF-AI/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertModel",
10
- "AutoModelForMaskedLM": "BASF-AI/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForPreTraining"
11
  },
12
  "bos_token_id": null,
13
  "causal": false,
14
  "dense_seq_output": true,
15
- "embd_pdrop": 0.0,
16
  "eos_token_id": null,
17
  "fused_bias_fc": true,
18
  "fused_dropout_add_ln": true,
19
  "initializer_range": 0.02,
20
  "layer_norm_epsilon": 1e-12,
 
21
  "mlp_fc1_bias": false,
22
  "mlp_fc2_bias": false,
23
  "model_type": "nomic_bert",
@@ -32,12 +33,12 @@
32
  "prenorm": false,
33
  "qkv_proj_bias": false,
34
  "reorder_and_upcast_attn": false,
35
- "resid_pdrop": 0.0,
36
  "rotary_emb_base": 1000,
37
  "rotary_emb_fraction": 1.0,
38
  "rotary_emb_interleaved": false,
39
  "rotary_emb_scale_base": null,
40
- "rotary_scaling_factor": 2,
41
  "scale_attn_by_inverse_layer_idx": false,
42
  "scale_attn_weights": true,
43
  "summary_activation": null,
@@ -46,7 +47,7 @@
46
  "summary_type": "cls_index",
47
  "summary_use_proj": true,
48
  "torch_dtype": "float32",
49
- "transformers_version": "4.34.0",
50
  "type_vocab_size": 2,
51
  "use_cache": true,
52
  "use_flash_attn": true,
 
4
  "NomicBertModel"
5
  ],
6
  "attn_pdrop": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_hf_nomic_bert.NomicBertConfig",
9
+ "AutoModel": "modeling_hf_nomic_bert.NomicBertModel",
10
+ "AutoModelForMaskedLM": "modeling_hf_nomic_bert.NomicBertForPreTraining"
11
  },
12
  "bos_token_id": null,
13
  "causal": false,
14
  "dense_seq_output": true,
15
+ "embd_pdrop": 0.1,
16
  "eos_token_id": null,
17
  "fused_bias_fc": true,
18
  "fused_dropout_add_ln": true,
19
  "initializer_range": 0.02,
20
  "layer_norm_epsilon": 1e-12,
21
+ "max_trained_positions": 2048,
22
  "mlp_fc1_bias": false,
23
  "mlp_fc2_bias": false,
24
  "model_type": "nomic_bert",
 
33
  "prenorm": false,
34
  "qkv_proj_bias": false,
35
  "reorder_and_upcast_attn": false,
36
+ "resid_pdrop": 0.1,
37
  "rotary_emb_base": 1000,
38
  "rotary_emb_fraction": 1.0,
39
  "rotary_emb_interleaved": false,
40
  "rotary_emb_scale_base": null,
41
+ "rotary_scaling_factor": null,
42
  "scale_attn_by_inverse_layer_idx": false,
43
  "scale_attn_weights": true,
44
  "summary_activation": null,
 
47
  "summary_type": "cls_index",
48
  "summary_use_proj": true,
49
  "torch_dtype": "float32",
50
+ "transformers_version": "4.50.3",
51
  "type_vocab_size": 2,
52
  "use_cache": true,
53
  "use_flash_attn": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47e396424a085a613034450cd4bf9e8acfb568b19089ae1c5c4e7051ae286877
3
  size 546938168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cdcf04b88cd3bd1228e0f8932c4e4c76af3a8ccefc46c0112092181cf70d387
3
  size 546938168