Upload folder using huggingface_hub
Browse files- 1_Pooling/config.json +10 -0
- README.md +486 -0
- config.json +31 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +58 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:32400
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
base_model: BAAI/bge-base-en-v1.5
|
10 |
+
widget:
|
11 |
+
- source_sentence: Looking for films with Tom Hanks and Audrey Tautou in the main
|
12 |
+
cast and centered around holy grail
|
13 |
+
sentences:
|
14 |
+
- 'Das Boot is a 1981 drama, history, war movie directed by Wolfgang Petersen. Starring
|
15 |
+
Jürgen Prochnow, Herbert Grönemeyer, and Klaus Wennemann, the story explores themes
|
16 |
+
like based on novel or book, submarine, war correspondent, atlantic ocean, gibraltar,
|
17 |
+
world war ii, duty, suicide mission, drinking, sailor, convoy, destroyer, naval
|
18 |
+
warfare, naval battle, battle of the atlantic, german u-boat fleet, and confined
|
19 |
+
spaces. A German submarine hunts allied ships during the Second World War, but
|
20 |
+
it soon becomes the hunted. The crew tries to survive below the surface, while
|
21 |
+
stretching both the boat and themselves to their limits. Tagline: On land they
|
22 |
+
dreamed of being heroes. Beneath the sea they pray to be survivors.'
|
23 |
+
- 'The Da Vinci Code is a 2006 thriller, mystery movie directed by Ron Howard. Starring
|
24 |
+
Tom Hanks, Audrey Tautou, and Ian McKellen, the story explores themes like paris,
|
25 |
+
france, based on novel or book, holy grail, christianity, monk, secret society,
|
26 |
+
louvre museum, heresy, mona lisa (la gioconda), freemason, pentagram, conspiracy,
|
27 |
+
tomb, catholicism, cryptologist, iconography, albino, and sect. A murder in Paris’
|
28 |
+
Louvre Museum and cryptic clues in some of Leonardo da Vinci’s most famous paintings
|
29 |
+
lead to the discovery of a religious mystery. For 2,000 years a secret society
|
30 |
+
closely guards information that — should it come to light — could rock the very
|
31 |
+
foundations of Christianity. Tagline: Seek the truth.'
|
32 |
+
- 'This Boy''s Life is a 1993 drama movie directed by Michael Caton-Jones. Starring
|
33 |
+
Robert De Niro, Ellen Barkin, and Leonardo DiCaprio, the story explores themes
|
34 |
+
like jealousy, based on novel or book, single parent, seattle, washington, car
|
35 |
+
mechanic, relationship problems, based on true story, stepfather, family relationships,
|
36 |
+
coming of age, alcoholic, oppression, and 1950s. When a son and mother move to
|
37 |
+
Seattle in hopes for a better life, the mother meets a seemingly polite man. Things
|
38 |
+
go south when the man turns out to be abusive, endangering their lives. As the
|
39 |
+
mother struggles to maintain hope in an impossible situation, the son has plans
|
40 |
+
to escape. Tagline: He looked like the ideal husband. He seemed like the perfect
|
41 |
+
father. That''s just what they needed. But that''s not what they got.'
|
42 |
+
- source_sentence: Movies that explore the boundaries between life and death with
|
43 |
+
a touch of romance
|
44 |
+
sentences:
|
45 |
+
- 'City of Angels is a 1998 romance, drama, fantasy movie directed by Brad Silberling.
|
46 |
+
Starring Nicolas Cage, Meg Ryan, and Andre Braugher, the story explores themes
|
47 |
+
like suicide, operation, life and death, afterlife, angel, faith, heaven, remake,
|
48 |
+
los angeles, california, and interspecies romance. When a guardian angel – who
|
49 |
+
invisibly watches over the citizens of Los Angeles – becomes captivated by a strong-willed
|
50 |
+
heart surgeon, he ponders trading in his pure, otherworldly existence for a mortal
|
51 |
+
life with his beloved. The couple embarks on a tender but forbidden romance spanning
|
52 |
+
heaven and Earth. Tagline: She didn''t believe in angels until she fell in love
|
53 |
+
with one.'
|
54 |
+
- 'Spirited Away is a 2001 animation, family, fantasy movie directed by Hayao Miyazaki.
|
55 |
+
Starring Rumi Hiiragi, Miyu Irino, and Mari Natsuki, the story explores themes
|
56 |
+
like witch, parent child relationship, darkness, bath house, magic, spirit, parallel
|
57 |
+
world, amusement park, youkai, japanese mythology, and anime. A young girl, Chihiro,
|
58 |
+
becomes trapped in a strange new world of spirits. When her parents undergo a
|
59 |
+
mysterious transformation, she must call upon the courage she never knew she had
|
60 |
+
to free her family. Tagline: On the other side of the tunnel was a mysterious
|
61 |
+
town.'
|
62 |
+
- 'Apartment 7A is a 2024 horror, thriller movie directed by Natalie Erika James.
|
63 |
+
Starring Julia Garner, Dianne Wiest, and Jim Sturgess, the story explores themes
|
64 |
+
like new york city, satanism, prequel, occult, satanic cult, manhattan, new york
|
65 |
+
city, 1960s, apartment, and pretentious. A struggling young dancer finds herself
|
66 |
+
drawn in by dark forces when a peculiar, well-connected older couple promise her
|
67 |
+
a shot at fame. Tagline: Rosemary was not the first.'
|
68 |
+
- source_sentence: Movies about family dynamics and overcoming tragedy
|
69 |
+
sentences:
|
70 |
+
- 'Waves is a 2019 romance, drama movie directed by Trey Edward Shults. Starring
|
71 |
+
Kelvin Harrison, Jr., Taylor Russell, and Renée Elise Goldsberry, the story explores
|
72 |
+
themes like high school, florida, regret, forgiveness, wrestling, loss, coming
|
73 |
+
of age, grief, tragedy, interracial relationship, break-up, dying father, healing
|
74 |
+
process, family dynamics, generation z, downward spiral, father son relationship,
|
75 |
+
father daughter relationship, brother sister relationship, toxic masculinity,
|
76 |
+
african american, teen pregnancy, stepmother stepdaughter relationship, high school
|
77 |
+
athlete, stepmother stepson relationship, and diverging narrative. A controlling
|
78 |
+
father’s attempts to ensure that his two children succeed in high school backfire
|
79 |
+
after his son experiences a career-ending sports injury. Their familial bonds
|
80 |
+
are eventually placed under severe strain by an unexpected tragedy. Tagline: Love
|
81 |
+
is patient.'
|
82 |
+
- 'Barbie Fairytopia: Mermaidia is a 2006 animation, family, adventure, fantasy
|
83 |
+
movie directed by William Lau. Starring Kelly Sheridan, Lee Tockar, and Christopher
|
84 |
+
Gaze, the story explores themes like fairy, mermaid, and based on toy. In this
|
85 |
+
animated follow-up to Fairytopia, Elina enlists the help of a mermaid, Nori, to
|
86 |
+
save her friend Nalu, a merman prince who has been captured by the wicked Laverna.
|
87 |
+
Tagline: Journey under the sea in a magical adventure.'
|
88 |
+
- 'Outland is a 1981 science fiction, action, thriller movie directed by Peter Hyams.
|
89 |
+
Starring Sean Connery, Peter Boyle, and Frances Sternhagen, the story explores
|
90 |
+
themes like husband wife relationship, jupiter, wife, marshal, mining, space western,
|
91 |
+
and space centre. An honest marshal in a corrupt mining colony on Io, Jupiter''s
|
92 |
+
sunless third moon, is determined to confront a violent drug ring even though
|
93 |
+
it may cost him his life. After his wife angrily deserts him, he waits alone for
|
94 |
+
the arrival of killers hired by the company to eliminate him. Tagline: On Jupiter''s
|
95 |
+
moon he''s the only law.'
|
96 |
+
- source_sentence: 'Movies with heartwarming magical adventures and strong friendships
|
97 |
+
like Cardcaptor Sakura: The Sealed Card.'
|
98 |
+
sentences:
|
99 |
+
- 'Cardcaptor Sakura: The Sealed Card is a 2000 animation, adventure, fantasy, romance
|
100 |
+
movie directed by Morio Asaka. Starring Sakura Tange, Motoko Kumai, and Aya Hisakawa,
|
101 |
+
the story explores themes like japan, magic, elementary school, romance, clamp,
|
102 |
+
shoujo, sixth grader, card, video tape, anime, and magical girl. All of the Clow
|
103 |
+
Cards have been captured, and Sakura Kinomoto, the new Master of the Cards, is
|
104 |
+
preparing to play the lead in the play for the town festival. However, a new evil
|
105 |
+
force is causing mysterious events all over Tomoeda, including the disappearance
|
106 |
+
of Sakura''s cards. With Syaoran''s help, Sakura must figure out the cause of
|
107 |
+
these events, and save her town.'
|
108 |
+
- 'The Princess Diaries is a 2001 comedy, family, romance movie directed by Garry
|
109 |
+
Marshall. Starring Anne Hathaway, Julie Andrews, and Heather Matarazzo, the story
|
110 |
+
explores themes like princess, high school, based on novel or book, grandparent
|
111 |
+
grandchild relationship, heir to the throne, royalty, social outcast, fictitious
|
112 |
+
country, and based on young adult novel. A socially awkward but very bright 15-year-old
|
113 |
+
girl being raised by a single mom discovers that she is the princess of a small
|
114 |
+
European country because of the recent death of her long-absent father, who, unknown
|
115 |
+
to her, was the crown prince of Genovia. She must make a choice between continuing
|
116 |
+
the life of a San Francisco teen or stepping up to the throne. Tagline: She rocks.
|
117 |
+
She rules. She reigns.'
|
118 |
+
- 'My Own Private Idaho is a 1991 drama movie directed by Gus Van Sant. Starring
|
119 |
+
River Phoenix, Keanu Reeves, and James Russo, the story explores themes like individual,
|
120 |
+
friendship, treasure, robbery, sibling relationship, rome, italy, parent child
|
121 |
+
relationship, generations conflict, portland, oregon, hustler, cocaine, idaho,
|
122 |
+
seattle, washington, male friendship, road trip, unrequited love, male prostitution,
|
123 |
+
poverty, prostitution, incest, lgbt, lost mother, narcolepsy, father son relationship,
|
124 |
+
and gay theme. In this loose adaptation of Shakespeare''s "Henry IV," Mike Waters
|
125 |
+
is a hustler afflicted with narcolepsy. Scott Favor is the rebellious son of a
|
126 |
+
mayor. Together, the two travel from Portland, Oregon to Idaho and finally to
|
127 |
+
the coast of Italy in a quest to find Mike''s estranged mother. Along the way
|
128 |
+
they turn tricks for money and drugs, eventually attracting the attention of a
|
129 |
+
wealthy benefactor and sexual deviant. Tagline: Wherever, whatever, have a nice
|
130 |
+
day.'
|
131 |
+
- source_sentence: Looking for films with Emily Browning and Abbie Cornish in the
|
132 |
+
main cast and centered around asylum
|
133 |
+
sentences:
|
134 |
+
- 'Sucker Punch is a 2011 action, fantasy, thriller movie directed by Zack Snyder.
|
135 |
+
Starring Emily Browning, Abbie Cornish, and Jena Malone, the story explores themes
|
136 |
+
like samurai, escape, asylum, brothel, sword fight, dragon, robot, inmate, lobotomy,
|
137 |
+
war zone, abusive stepfather, rape culture, and alternate reality. A young woman,
|
138 |
+
institutionalized by her abusive stepfather, retreats into a vivid fantasy world
|
139 |
+
where she envisions a plan to escape. Gathering a group of fellow inmates, she
|
140 |
+
embarks on a quest to collect five mystical items, blurring the lines between
|
141 |
+
reality and imagination. Tagline: You will be unprepared.'
|
142 |
+
- 'Shaun of the Dead is a 2004 horror, comedy movie directed by Edgar Wright. Starring
|
143 |
+
Simon Pegg, Nick Frost, and Kate Ashfield, the story explores themes like london,
|
144 |
+
england, dark comedy, satire, surrey, parody, slacker, friends, survival, zombie,
|
145 |
+
cynical, survival horror, british pub, boyfriend girlfriend relationship, taunting,
|
146 |
+
zombie apocalypse, frantic, satirical, desperate, anxious, playful, dramatic,
|
147 |
+
suspenseful, witty, amused, defiant, exuberant, and farcical. Shaun lives a supremely
|
148 |
+
uneventful life, which revolves around his girlfriend, his mother, and, above
|
149 |
+
all, his local pub. This gentle routine is threatened when the dead return to
|
150 |
+
life and make strenuous attempts to snack on ordinary Londoners. Tagline: A romantic
|
151 |
+
comedy. With zombies.'
|
152 |
+
- 'Jaws 2 is a 1978 horror, thriller movie directed by Jeannot Szwarc. Starring
|
153 |
+
Roy Scheider, Lorraine Gary, and Murray Hamilton, the story explores themes like
|
154 |
+
dying and death, rescue, island, panic, mayor, shark attack, police chief, current,
|
155 |
+
boat accident, animal attack, sailing, sequel, scuba diving, creature, shark,
|
156 |
+
great white shark, killer whale, high-tension current, water skiing, and tourism.
|
157 |
+
Police chief Brody must protect the citizens of Amity after a second monstrous
|
158 |
+
shark begins terrorizing the waters. Tagline: Just when you thought it was safe
|
159 |
+
to go back in the water...'
|
160 |
+
pipeline_tag: sentence-similarity
|
161 |
+
library_name: sentence-transformers
|
162 |
+
---
|
163 |
+
|
164 |
+
# SentenceTransformer based on BAAI/bge-base-en-v1.5
|
165 |
+
|
166 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
167 |
+
|
168 |
+
## Model Details
|
169 |
+
|
170 |
+
### Model Description
|
171 |
+
- **Model Type:** Sentence Transformer
|
172 |
+
- **Base model:** [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) <!-- at revision a5beb1e3e68b9ab74eb54cfd186867f64f240e1a -->
|
173 |
+
- **Maximum Sequence Length:** 512 tokens
|
174 |
+
- **Output Dimensionality:** 768 dimensions
|
175 |
+
- **Similarity Function:** Cosine Similarity
|
176 |
+
<!-- - **Training Dataset:** Unknown -->
|
177 |
+
<!-- - **Language:** Unknown -->
|
178 |
+
<!-- - **License:** Unknown -->
|
179 |
+
|
180 |
+
### Model Sources
|
181 |
+
|
182 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
183 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
184 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
185 |
+
|
186 |
+
### Full Model Architecture
|
187 |
+
|
188 |
+
```
|
189 |
+
SentenceTransformer(
|
190 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
|
191 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
192 |
+
(2): Normalize()
|
193 |
+
)
|
194 |
+
```
|
195 |
+
|
196 |
+
## Usage
|
197 |
+
|
198 |
+
### Direct Usage (Sentence Transformers)
|
199 |
+
|
200 |
+
First install the Sentence Transformers library:
|
201 |
+
|
202 |
+
```bash
|
203 |
+
pip install -U sentence-transformers
|
204 |
+
```
|
205 |
+
|
206 |
+
Then you can load this model and run inference.
|
207 |
+
```python
|
208 |
+
from sentence_transformers import SentenceTransformer
|
209 |
+
|
210 |
+
# Download from the 🤗 Hub
|
211 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
212 |
+
# Run inference
|
213 |
+
sentences = [
|
214 |
+
'Looking for films with Emily Browning and Abbie Cornish in the main cast and centered around asylum',
|
215 |
+
'Sucker Punch is a 2011 action, fantasy, thriller movie directed by Zack Snyder. Starring Emily Browning, Abbie Cornish, and Jena Malone, the story explores themes like samurai, escape, asylum, brothel, sword fight, dragon, robot, inmate, lobotomy, war zone, abusive stepfather, rape culture, and alternate reality. A young woman, institutionalized by her abusive stepfather, retreats into a vivid fantasy world where she envisions a plan to escape. Gathering a group of fellow inmates, she embarks on a quest to collect five mystical items, blurring the lines between reality and imagination. Tagline: You will be unprepared.',
|
216 |
+
'Shaun of the Dead is a 2004 horror, comedy movie directed by Edgar Wright. Starring Simon Pegg, Nick Frost, and Kate Ashfield, the story explores themes like london, england, dark comedy, satire, surrey, parody, slacker, friends, survival, zombie, cynical, survival horror, british pub, boyfriend girlfriend relationship, taunting, zombie apocalypse, frantic, satirical, desperate, anxious, playful, dramatic, suspenseful, witty, amused, defiant, exuberant, and farcical. Shaun lives a supremely uneventful life, which revolves around his girlfriend, his mother, and, above all, his local pub. This gentle routine is threatened when the dead return to life and make strenuous attempts to snack on ordinary Londoners. Tagline: A romantic comedy. With zombies.',
|
217 |
+
]
|
218 |
+
embeddings = model.encode(sentences)
|
219 |
+
print(embeddings.shape)
|
220 |
+
# [3, 768]
|
221 |
+
|
222 |
+
# Get the similarity scores for the embeddings
|
223 |
+
similarities = model.similarity(embeddings, embeddings)
|
224 |
+
print(similarities.shape)
|
225 |
+
# [3, 3]
|
226 |
+
```
|
227 |
+
|
228 |
+
<!--
|
229 |
+
### Direct Usage (Transformers)
|
230 |
+
|
231 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
232 |
+
|
233 |
+
</details>
|
234 |
+
-->
|
235 |
+
|
236 |
+
<!--
|
237 |
+
### Downstream Usage (Sentence Transformers)
|
238 |
+
|
239 |
+
You can finetune this model on your own dataset.
|
240 |
+
|
241 |
+
<details><summary>Click to expand</summary>
|
242 |
+
|
243 |
+
</details>
|
244 |
+
-->
|
245 |
+
|
246 |
+
<!--
|
247 |
+
### Out-of-Scope Use
|
248 |
+
|
249 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
250 |
+
-->
|
251 |
+
|
252 |
+
<!--
|
253 |
+
## Bias, Risks and Limitations
|
254 |
+
|
255 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
256 |
+
-->
|
257 |
+
|
258 |
+
<!--
|
259 |
+
### Recommendations
|
260 |
+
|
261 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
262 |
+
-->
|
263 |
+
|
264 |
+
## Training Details
|
265 |
+
|
266 |
+
### Training Dataset
|
267 |
+
|
268 |
+
#### Unnamed Dataset
|
269 |
+
|
270 |
+
* Size: 32,400 training samples
|
271 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
|
272 |
+
* Approximate statistics based on the first 1000 samples:
|
273 |
+
| | sentence_0 | sentence_1 | sentence_2 |
|
274 |
+
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
275 |
+
| type | string | string | string |
|
276 |
+
| details | <ul><li>min: 8 tokens</li><li>mean: 16.48 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 36 tokens</li><li>mean: 147.71 tokens</li><li>max: 307 tokens</li></ul> | <ul><li>min: 46 tokens</li><li>mean: 149.19 tokens</li><li>max: 328 tokens</li></ul> |
|
277 |
+
* Samples:
|
278 |
+
| sentence_0 | sentence_1 | sentence_2 |
|
279 |
+
|:----------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
280 |
+
| <code>Critically acclaimed thriller movies directed by Sam Raimi exploring the themes of work and psychologist</code> | <code>Drag Me to Hell is a 2009 horror, thriller movie directed by Sam Raimi. Starring Alison Lohman, Justin Long, and Lorna Raver, the story explores themes like work, gypsy, gore, curse, psychologist, psychic, evil, loan officer, obituary, engagement ring, hilarious, and ghoulish. After denying a woman the extension she needs to keep her home, loan officer Christine Brown sees her once-promising life take a startling turn for the worse. Christine is convinced she's been cursed by a Gypsy, but her boyfriend is skeptical. Her only hope seems to lie in a psychic who claims he can help her lift the curse and keep her soul from being dragged straight to hell. Tagline: Christine Brown has a good job, a great boyfriend, and a bright future. But in three days, she's going to hell.</code> | <code>Waiting... is a 2005 comedy, romance movie directed by Rob McKittrick. Starring Ryan Reynolds, Anna Faris, and Justin Long, the story explores themes like decision, waiter, hostess, trainee, gross out, employer employee relationship, glass pipe, and screaming. Employees at a Bennigan's-like restaurant (called, creatively enough, Shenanigan's), kill time before their real lives get started. But while they wait, they'll have to deal with picky customers who want their steak cooked to order and enthusiastic managers who want to build the perfect wait staff. Luckily, these employees have effective revenge tactics. Tagline: What happens in the kitchen ends up on the plate.</code> |
|
281 |
+
| <code>Critically acclaimed comedy movies directed by Brian Brough exploring the themes of businessman and life</code> | <code>Beauty and the Billionaire is a 2022 tv movie, romance, comedy movie directed by Brian Brough. Starring Sashleigha Hightower, Chris Reid, and Tanner Gillman, the story explores themes like businessman. Addison travels with a picky billionaire as an assistant in her brother's place so he doesn't lose his job. She clashes with the billionaire until both start to see more in each other than they thought.</code> | <code>Nightwatch: Demons Are Forever is a 2023 horror, thriller, mystery movie directed by Ole Bornedal. Starring Fanny Leander Bornedal, Nikolaj Coster-Waldau, and Kim Bodnia, the story explores themes like sequel. Martin's daughter, Emma, takes up a night watch job to find out what happened to her parents almost thirty years ago. A meeting with Wörmer in his cell pulls the serial killer out of his coma and sets in motion a chain of fateful events.</code> |
|
282 |
+
| <code>Stories about finding belonging in unexpected places</code> | <code>Woody Woodpecker Goes to Camp is a 2024 family, comedy, animation movie directed by Jonathan A. Rosenbaum. Starring Eric Bauza, Kevin Michael Richardson, and Tom Kenny, the story explores themes like camping, woodpecker, and bird. After getting kicked out of the forest, Woody thinks he's found a forever home at Camp Woo Hoo — until an inspector threatens to shut down the camp.</code> | <code>Jacob's Ladder is a 1990 drama, mystery, horror movie directed by Adrian Lyne. Starring Tim Robbins, Elizabeth Peña, and Danny Aiello, the story explores themes like vietnam veteran, new york city, post-traumatic stress disorder (ptsd), experiment, nightmare, subway, 1970s, paranoia, hallucination, car bomb, grief, memory, chemist, demon, postal worker, figment of imagination, oneiric, and chiropractor. After returning home from the Vietnam War, veteran Jacob Singer struggles to maintain his sanity. Plagued by hallucinations and flashbacks, Singer rapidly falls apart as the world and people around him morph and twist into disturbing images. His girlfriend, Jezzie, and ex-wife, Sarah, try to help, but to little avail. Even Singer's chiropractor friend, Louis, fails to reach him as he descends into madness. Tagline: The most frightening thing about Jacob Singer's nightmare is that he isn't dreaming.</code> |
|
283 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
284 |
+
```json
|
285 |
+
{
|
286 |
+
"scale": 20.0,
|
287 |
+
"similarity_fct": "cos_sim"
|
288 |
+
}
|
289 |
+
```
|
290 |
+
|
291 |
+
### Training Hyperparameters
|
292 |
+
#### Non-Default Hyperparameters
|
293 |
+
|
294 |
+
- `per_device_train_batch_size`: 32
|
295 |
+
- `per_device_eval_batch_size`: 32
|
296 |
+
- `num_train_epochs`: 4
|
297 |
+
- `multi_dataset_batch_sampler`: round_robin
|
298 |
+
|
299 |
+
#### All Hyperparameters
|
300 |
+
<details><summary>Click to expand</summary>
|
301 |
+
|
302 |
+
- `overwrite_output_dir`: False
|
303 |
+
- `do_predict`: False
|
304 |
+
- `eval_strategy`: no
|
305 |
+
- `prediction_loss_only`: True
|
306 |
+
- `per_device_train_batch_size`: 32
|
307 |
+
- `per_device_eval_batch_size`: 32
|
308 |
+
- `per_gpu_train_batch_size`: None
|
309 |
+
- `per_gpu_eval_batch_size`: None
|
310 |
+
- `gradient_accumulation_steps`: 1
|
311 |
+
- `eval_accumulation_steps`: None
|
312 |
+
- `torch_empty_cache_steps`: None
|
313 |
+
- `learning_rate`: 5e-05
|
314 |
+
- `weight_decay`: 0.0
|
315 |
+
- `adam_beta1`: 0.9
|
316 |
+
- `adam_beta2`: 0.999
|
317 |
+
- `adam_epsilon`: 1e-08
|
318 |
+
- `max_grad_norm`: 1
|
319 |
+
- `num_train_epochs`: 4
|
320 |
+
- `max_steps`: -1
|
321 |
+
- `lr_scheduler_type`: linear
|
322 |
+
- `lr_scheduler_kwargs`: {}
|
323 |
+
- `warmup_ratio`: 0.0
|
324 |
+
- `warmup_steps`: 0
|
325 |
+
- `log_level`: passive
|
326 |
+
- `log_level_replica`: warning
|
327 |
+
- `log_on_each_node`: True
|
328 |
+
- `logging_nan_inf_filter`: True
|
329 |
+
- `save_safetensors`: True
|
330 |
+
- `save_on_each_node`: False
|
331 |
+
- `save_only_model`: False
|
332 |
+
- `restore_callback_states_from_checkpoint`: False
|
333 |
+
- `no_cuda`: False
|
334 |
+
- `use_cpu`: False
|
335 |
+
- `use_mps_device`: False
|
336 |
+
- `seed`: 42
|
337 |
+
- `data_seed`: None
|
338 |
+
- `jit_mode_eval`: False
|
339 |
+
- `use_ipex`: False
|
340 |
+
- `bf16`: False
|
341 |
+
- `fp16`: False
|
342 |
+
- `fp16_opt_level`: O1
|
343 |
+
- `half_precision_backend`: auto
|
344 |
+
- `bf16_full_eval`: False
|
345 |
+
- `fp16_full_eval`: False
|
346 |
+
- `tf32`: None
|
347 |
+
- `local_rank`: 0
|
348 |
+
- `ddp_backend`: None
|
349 |
+
- `tpu_num_cores`: None
|
350 |
+
- `tpu_metrics_debug`: False
|
351 |
+
- `debug`: []
|
352 |
+
- `dataloader_drop_last`: False
|
353 |
+
- `dataloader_num_workers`: 0
|
354 |
+
- `dataloader_prefetch_factor`: None
|
355 |
+
- `past_index`: -1
|
356 |
+
- `disable_tqdm`: False
|
357 |
+
- `remove_unused_columns`: True
|
358 |
+
- `label_names`: None
|
359 |
+
- `load_best_model_at_end`: False
|
360 |
+
- `ignore_data_skip`: False
|
361 |
+
- `fsdp`: []
|
362 |
+
- `fsdp_min_num_params`: 0
|
363 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
364 |
+
- `tp_size`: 0
|
365 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
366 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
367 |
+
- `deepspeed`: None
|
368 |
+
- `label_smoothing_factor`: 0.0
|
369 |
+
- `optim`: adamw_torch
|
370 |
+
- `optim_args`: None
|
371 |
+
- `adafactor`: False
|
372 |
+
- `group_by_length`: False
|
373 |
+
- `length_column_name`: length
|
374 |
+
- `ddp_find_unused_parameters`: None
|
375 |
+
- `ddp_bucket_cap_mb`: None
|
376 |
+
- `ddp_broadcast_buffers`: False
|
377 |
+
- `dataloader_pin_memory`: True
|
378 |
+
- `dataloader_persistent_workers`: False
|
379 |
+
- `skip_memory_metrics`: True
|
380 |
+
- `use_legacy_prediction_loop`: False
|
381 |
+
- `push_to_hub`: False
|
382 |
+
- `resume_from_checkpoint`: None
|
383 |
+
- `hub_model_id`: None
|
384 |
+
- `hub_strategy`: every_save
|
385 |
+
- `hub_private_repo`: None
|
386 |
+
- `hub_always_push`: False
|
387 |
+
- `gradient_checkpointing`: False
|
388 |
+
- `gradient_checkpointing_kwargs`: None
|
389 |
+
- `include_inputs_for_metrics`: False
|
390 |
+
- `include_for_metrics`: []
|
391 |
+
- `eval_do_concat_batches`: True
|
392 |
+
- `fp16_backend`: auto
|
393 |
+
- `push_to_hub_model_id`: None
|
394 |
+
- `push_to_hub_organization`: None
|
395 |
+
- `mp_parameters`:
|
396 |
+
- `auto_find_batch_size`: False
|
397 |
+
- `full_determinism`: False
|
398 |
+
- `torchdynamo`: None
|
399 |
+
- `ray_scope`: last
|
400 |
+
- `ddp_timeout`: 1800
|
401 |
+
- `torch_compile`: False
|
402 |
+
- `torch_compile_backend`: None
|
403 |
+
- `torch_compile_mode`: None
|
404 |
+
- `include_tokens_per_second`: False
|
405 |
+
- `include_num_input_tokens_seen`: False
|
406 |
+
- `neftune_noise_alpha`: None
|
407 |
+
- `optim_target_modules`: None
|
408 |
+
- `batch_eval_metrics`: False
|
409 |
+
- `eval_on_start`: False
|
410 |
+
- `use_liger_kernel`: False
|
411 |
+
- `eval_use_gather_object`: False
|
412 |
+
- `average_tokens_across_devices`: False
|
413 |
+
- `prompts`: None
|
414 |
+
- `batch_sampler`: batch_sampler
|
415 |
+
- `multi_dataset_batch_sampler`: round_robin
|
416 |
+
|
417 |
+
</details>
|
418 |
+
|
419 |
+
### Training Logs
|
420 |
+
| Epoch | Step | Training Loss |
|
421 |
+
|:------:|:----:|:-------------:|
|
422 |
+
| 0.4936 | 500 | 0.8319 |
|
423 |
+
| 0.9872 | 1000 | 0.553 |
|
424 |
+
| 1.4808 | 1500 | 0.4125 |
|
425 |
+
| 1.9743 | 2000 | 0.4022 |
|
426 |
+
| 2.4679 | 2500 | 0.3187 |
|
427 |
+
| 2.9615 | 3000 | 0.3098 |
|
428 |
+
| 3.4551 | 3500 | 0.2672 |
|
429 |
+
| 3.9487 | 4000 | 0.2613 |
|
430 |
+
|
431 |
+
|
432 |
+
### Framework Versions
|
433 |
+
- Python: 3.11.12
|
434 |
+
- Sentence Transformers: 3.4.1
|
435 |
+
- Transformers: 4.51.3
|
436 |
+
- PyTorch: 2.6.0+cu124
|
437 |
+
- Accelerate: 1.6.0
|
438 |
+
- Datasets: 3.6.0
|
439 |
+
- Tokenizers: 0.21.1
|
440 |
+
|
441 |
+
## Citation
|
442 |
+
|
443 |
+
### BibTeX
|
444 |
+
|
445 |
+
#### Sentence Transformers
|
446 |
+
```bibtex
|
447 |
+
@inproceedings{reimers-2019-sentence-bert,
|
448 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
449 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
450 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
451 |
+
month = "11",
|
452 |
+
year = "2019",
|
453 |
+
publisher = "Association for Computational Linguistics",
|
454 |
+
url = "https://arxiv.org/abs/1908.10084",
|
455 |
+
}
|
456 |
+
```
|
457 |
+
|
458 |
+
#### MultipleNegativesRankingLoss
|
459 |
+
```bibtex
|
460 |
+
@misc{henderson2017efficient,
|
461 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
462 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
463 |
+
year={2017},
|
464 |
+
eprint={1705.00652},
|
465 |
+
archivePrefix={arXiv},
|
466 |
+
primaryClass={cs.CL}
|
467 |
+
}
|
468 |
+
```
|
469 |
+
|
470 |
+
<!--
|
471 |
+
## Glossary
|
472 |
+
|
473 |
+
*Clearly define terms in order to be accessible across audiences.*
|
474 |
+
-->
|
475 |
+
|
476 |
+
<!--
|
477 |
+
## Model Card Authors
|
478 |
+
|
479 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
480 |
+
-->
|
481 |
+
|
482 |
+
<!--
|
483 |
+
## Model Card Contact
|
484 |
+
|
485 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
486 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 3072,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
+
"layer_norm_eps": 1e-12,
|
20 |
+
"max_position_embeddings": 512,
|
21 |
+
"model_type": "bert",
|
22 |
+
"num_attention_heads": 12,
|
23 |
+
"num_hidden_layers": 12,
|
24 |
+
"pad_token_id": 0,
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.51.3",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.4.1",
|
4 |
+
"transformers": "4.51.3",
|
5 |
+
"pytorch": "2.6.0+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf31f501d4199b81c99d16a5677e682638602d84471b402f378a74441b4afc3c
|
3 |
+
size 437951328
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|