Upload folder using huggingface_hub
Browse files- README.md +270 -164
- config.json +17 -20
- model.safetensors +2 -2
- tokenizer.json +2 -4
- tokenizer_config.json +4 -9
README.md
CHANGED
@@ -4,159 +4,265 @@ tags:
|
|
4 |
- sentence-similarity
|
5 |
- feature-extraction
|
6 |
- generated_from_trainer
|
7 |
-
- dataset_size:
|
8 |
- loss:MultipleNegativesRankingLoss
|
9 |
base_model: BAAI/bge-base-en-v1.5
|
10 |
widget:
|
11 |
-
- source_sentence:
|
12 |
-
|
13 |
sentences:
|
14 |
-
- '
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
sentences:
|
45 |
-
- '
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
sentences:
|
70 |
-
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
98 |
sentences:
|
99 |
-
- '
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
sentences:
|
134 |
-
- '
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
pipeline_tag: sentence-similarity
|
161 |
library_name: sentence-transformers
|
162 |
---
|
@@ -211,9 +317,9 @@ from sentence_transformers import SentenceTransformer
|
|
211 |
model = SentenceTransformer("sentence_transformers_model_id")
|
212 |
# Run inference
|
213 |
sentences = [
|
214 |
-
'
|
215 |
-
|
216 |
-
|
217 |
]
|
218 |
embeddings = model.encode(sentences)
|
219 |
print(embeddings.shape)
|
@@ -267,19 +373,19 @@ You can finetune this model on your own dataset.
|
|
267 |
|
268 |
#### Unnamed Dataset
|
269 |
|
270 |
-
* Size: 32,
|
271 |
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
|
272 |
* Approximate statistics based on the first 1000 samples:
|
273 |
| | sentence_0 | sentence_1 | sentence_2 |
|
274 |
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
275 |
| type | string | string | string |
|
276 |
-
| details | <ul><li>min: 8 tokens</li><li>mean: 16.
|
277 |
* Samples:
|
278 |
-
| sentence_0
|
279 |
-
|
280 |
-
| <code>
|
281 |
-
| <code>
|
282 |
-
| <code>
|
283 |
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
284 |
```json
|
285 |
{
|
@@ -419,14 +525,14 @@ You can finetune this model on your own dataset.
|
|
419 |
### Training Logs
|
420 |
| Epoch | Step | Training Loss |
|
421 |
|:------:|:----:|:-------------:|
|
422 |
-
| 0.
|
423 |
-
| 0.
|
424 |
-
| 1.
|
425 |
-
| 1.
|
426 |
-
| 2.
|
427 |
-
| 2.
|
428 |
-
| 3.
|
429 |
-
| 3.
|
430 |
|
431 |
|
432 |
### Framework Versions
|
@@ -435,7 +541,7 @@ You can finetune this model on your own dataset.
|
|
435 |
- Transformers: 4.51.3
|
436 |
- PyTorch: 2.6.0+cu124
|
437 |
- Accelerate: 1.6.0
|
438 |
-
- Datasets: 3.
|
439 |
- Tokenizers: 0.21.1
|
440 |
|
441 |
## Citation
|
|
|
4 |
- sentence-similarity
|
5 |
- feature-extraction
|
6 |
- generated_from_trainer
|
7 |
+
- dataset_size:32382
|
8 |
- loss:MultipleNegativesRankingLoss
|
9 |
base_model: BAAI/bge-base-en-v1.5
|
10 |
widget:
|
11 |
+
- source_sentence: What are some must-watch animation films from the 2000s reflecting
|
12 |
+
on dying and death and loss of loved one
|
13 |
sentences:
|
14 |
+
- 'Title: Che: Part One
|
15 |
+
|
16 |
+
Genres: Drama, History, War
|
17 |
+
|
18 |
+
Overview: The Argentine, begins as Che and a band of Cuban exiles (led by Fidel
|
19 |
+
Castro) reach the Cuban shore from Mexico in 1956. Within two years, they mobilized
|
20 |
+
popular support and an army and toppled the U.S.-friendly regime of dictator Fulgencio
|
21 |
+
Batista.
|
22 |
+
|
23 |
+
Tagline: Everyone knows the icon. Few know the man.
|
24 |
+
|
25 |
+
Director: Steven Soderbergh
|
26 |
+
|
27 |
+
Stars: Benicio del Toro, Demián Bichir, Santiago Cabrera
|
28 |
+
|
29 |
+
Release Date: 2008-09-05
|
30 |
+
|
31 |
+
Keywords: hero, central intelligence agency (cia), cuba, biography, che guevara,
|
32 |
+
fidel castro, cuban revolution, 1950s'
|
33 |
+
- 'Title: Ice Age
|
34 |
+
|
35 |
+
Genres: Animation, Comedy, Family, Adventure
|
36 |
+
|
37 |
+
Overview: With the impending ice age almost upon them, a mismatched trio of prehistoric
|
38 |
+
critters – Manny the woolly mammoth, Diego the saber-toothed tiger and Sid the
|
39 |
+
giant sloth – find an orphaned infant and decide to return it to its human parents.
|
40 |
+
Along the way, the unlikely allies become friends but, when enemies attack, their
|
41 |
+
quest takes on far nobler aims.
|
42 |
+
|
43 |
+
Tagline: They came. They thawed. They conquered.
|
44 |
+
|
45 |
+
Director: Chris Wedge
|
46 |
+
|
47 |
+
Stars: Ray Romano, John Leguizamo, Denis Leary
|
48 |
+
|
49 |
+
Release Date: 2002-03-10
|
50 |
+
|
51 |
+
Keywords: dying and death, human evolution, parent child relationship, squirrel,
|
52 |
+
loss of loved one, mammoth, sloth, villain, stone age, prehistory, prehistoric
|
53 |
+
creature, saber-toothed tiger, cavemen, road movie, neanderthal, prehistoric man,
|
54 |
+
dodo bird, nut, ground sloth, cheerful'
|
55 |
+
- 'Title: Castle in the Sky
|
56 |
+
|
57 |
+
Genres: Adventure, Fantasy, Animation, Action, Family
|
58 |
+
|
59 |
+
Overview: A young boy and a girl with a magic crystal must race against pirates
|
60 |
+
and foreign agents in a search for a legendary floating castle.
|
61 |
+
|
62 |
+
Tagline: One day, a girl came down from the sky…
|
63 |
+
|
64 |
+
Director: Hayao Miyazaki
|
65 |
+
|
66 |
+
Stars: Keiko Yokozawa, Mayumi Tanaka, Minori Terada
|
67 |
+
|
68 |
+
Release Date: 1986-08-02
|
69 |
+
|
70 |
+
Keywords: army, flying, magic, mine, castle, lost civilisation, pirate, orphan,
|
71 |
+
government agent, floating, pendant, blue sky, air pirate, crystal, anime, adventure,
|
72 |
+
amused'
|
73 |
+
- source_sentence: Stories of desperate characters lured into a life of crime for
|
74 |
+
financial gain.
|
75 |
sentences:
|
76 |
+
- 'Title: Emily the Criminal
|
77 |
+
|
78 |
+
Genres: Crime, Drama, Thriller
|
79 |
+
|
80 |
+
Overview: Desperate for income, Emily takes a shady gig buying goods with stolen
|
81 |
+
credit cards supplied by a charismatic middleman named Youcef. Seduced by the
|
82 |
+
quick cash and illicit thrills, they hatch a plan to take their business to the
|
83 |
+
next level.
|
84 |
+
|
85 |
+
Tagline: High risks come with even higher rewards.
|
86 |
+
|
87 |
+
Director: John Patton Ford
|
88 |
+
|
89 |
+
Stars: Aubrey Plaza, Theo Rossi, Megalyn Echikunwoke
|
90 |
+
|
91 |
+
Release Date: 2022-08-12
|
92 |
+
|
93 |
+
Keywords: job interview, organized crime, los angeles, california, criminal underworld,
|
94 |
+
credit card fraud, criminal record, food delivery, student debt'
|
95 |
+
- 'Title: The Mummy
|
96 |
+
|
97 |
+
Genres: Adventure, Action, Fantasy
|
98 |
+
|
99 |
+
Overview: Dashing legionnaire Rick O''Connell stumbles upon the hidden ruins of
|
100 |
+
Hamunaptra while in the midst of a battle to claim the area in 1920s Egypt. It
|
101 |
+
has been over three thousand years since former High Priest Imhotep suffered a
|
102 |
+
fate worse than death as a punishment for a forbidden love—along with a curse
|
103 |
+
that guarantees eternal doom upon the world if he is ever awoken.
|
104 |
+
|
105 |
+
Tagline: The sands will rise. The heavens will part. The power will be unleashed.
|
106 |
+
|
107 |
+
Director: Stephen Sommers
|
108 |
+
|
109 |
+
Stars: Brendan Fraser, Rachel Weisz, John Hannah
|
110 |
+
|
111 |
+
Release Date: 1999-04-16
|
112 |
+
|
113 |
+
Keywords: egypt, cairo, library, secret passage, pastor, pyramid, sandstorm, solar
|
114 |
+
eclipse, mummy, foreign legion, nile, secret society, treasure hunt, remake, archaeologist,
|
115 |
+
tomb, book of the dead, ancient egypt, opposites attract, 1920s, pharoah, good
|
116 |
+
versus evil'
|
117 |
+
- 'Title: Reality Bites
|
118 |
+
|
119 |
+
Genres: Drama, Romance, Comedy
|
120 |
+
|
121 |
+
Overview: A small circle of friends suffering from post-collegiate blues must
|
122 |
+
confront the hard truth about life, love and the pursuit of gainful employment.
|
123 |
+
As they struggle to map out survival guides for the future, the Gen-X quartet
|
124 |
+
soon begins to realize that reality isn''t all it''s cracked up to be.
|
125 |
+
|
126 |
+
Tagline: A comedy about love in the ''90s
|
127 |
+
|
128 |
+
Director: Ben Stiller
|
129 |
+
|
130 |
+
Stars: Winona Ryder, Ethan Hawke, Janeane Garofalo
|
131 |
+
|
132 |
+
Release Date: 1994-02-18
|
133 |
+
|
134 |
+
Keywords: yuppie, roommates, generations conflict, cohabitant, cabriolet, unemployed'
|
135 |
+
- source_sentence: Heartwarming animated dramas about friendship and ambition.
|
136 |
sentences:
|
137 |
+
- "Title: Trapezium\nGenres: Animation, Drama, Music\nOverview: High school student\
|
138 |
+
\ Yu Azuma will do whatever it takes to become an idol. Ready to make her dream\
|
139 |
+
\ a reality, she recruits three girls from the four corners of her prefecture.\
|
140 |
+
\ But the road to stardom hides unexpected trials.\nTagline: \nDirector: Masahiro\
|
141 |
+
\ Shinohara\nStars: Asaki Yuikawa, Hina Youmiya, Reina Ueda\nRelease Date: 2024-05-10\n\
|
142 |
+
Keywords: based on novel or book, anime, idol group, idol"
|
143 |
+
- "Title: Jaat\nGenres: Action, Drama\nOverview: After a ruffian accidentally ruins\
|
144 |
+
\ his meal, a traveler retorts violently and demands an apology, unintentionally\
|
145 |
+
\ finding himself in a web of violence, crime and corruption spun by a feared\
|
146 |
+
\ criminal.\nTagline: \nDirector: Gopichand Malineni\nStars: Sunny Deol, Randeep\
|
147 |
+
\ Hooda, Saiyami Kher\nRelease Date: 2025-04-10\nKeywords: bollywood"
|
148 |
+
- 'Title: Terminator: Dark Fate
|
149 |
+
|
150 |
+
Genres: Science Fiction, Action, Adventure, Thriller
|
151 |
+
|
152 |
+
Overview: Decades after Sarah Connor prevented Judgment Day, a lethal new Terminator
|
153 |
+
is sent to eliminate the future leader of the resistance. In a fight to save mankind,
|
154 |
+
battle-hardened Sarah Connor teams up with an unexpected ally and an enhanced
|
155 |
+
super soldier to stop the deadliest Terminator yet.
|
156 |
+
|
157 |
+
Tagline: Welcome to the day after judgement day
|
158 |
+
|
159 |
+
Director: Tim Miller
|
160 |
+
|
161 |
+
Stars: Linda Hamilton, Arnold Schwarzenegger, Mackenzie Davis
|
162 |
+
|
163 |
+
Release Date: 2019-10-23
|
164 |
+
|
165 |
+
Keywords: helicopter, mexico city, mexico, artificial intelligence (a.i.), cyborg,
|
166 |
+
dystopia, time travel, sequel, plane crash'
|
167 |
+
- source_sentence: Teen-centric horror movie with a chilling alien invasion plot
|
168 |
sentences:
|
169 |
+
- 'Title: The Faculty
|
170 |
+
|
171 |
+
Genres: Horror, Science Fiction
|
172 |
+
|
173 |
+
Overview: When some very creepy things start happening around school, the kids
|
174 |
+
at Herrington High make the chilling discovery that confirms their worst suspicions:
|
175 |
+
their teachers really are from another planet!
|
176 |
+
|
177 |
+
Tagline: Take me to your teacher.
|
178 |
+
|
179 |
+
Director: Robert Rodriguez
|
180 |
+
|
181 |
+
Stars: Josh Hartnett, Elijah Wood, Jordana Brewster
|
182 |
+
|
183 |
+
Release Date: 1998-12-25
|
184 |
+
|
185 |
+
Keywords: drug dealer, high school, homophobia, paranoia, alien, teacher, alien
|
186 |
+
invasion, drugs, alien infection, social status, parasite, creature feature, school
|
187 |
+
nurse, body snatchers, alien parasites, parasites, body horror, teenager, teen
|
188 |
+
scream'
|
189 |
+
- 'Title: Mystic River
|
190 |
+
|
191 |
+
Genres: Thriller, Crime, Drama, Mystery
|
192 |
+
|
193 |
+
Overview: The lives of three men who were childhood friends are shattered when
|
194 |
+
one of them suffers a family tragedy.
|
195 |
+
|
196 |
+
Tagline: We bury our sins, we wash them clean.
|
197 |
+
|
198 |
+
Director: Clint Eastwood
|
199 |
+
|
200 |
+
Stars: Sean Penn, Tim Robbins, Kevin Bacon
|
201 |
+
|
202 |
+
Release Date: 2003-10-07
|
203 |
+
|
204 |
+
Keywords: child abuse, sexual abuse, workers'' quarter, based on novel or book,
|
205 |
+
loss of loved one, suppressed past, boston, massachusetts, repayment, arbitrary
|
206 |
+
law, loyalty, massachusetts, whodunit, biting, guilt, childhood sexual abuse,
|
207 |
+
mysterious, grim, vengeance, poker race, sex abuse, forceful, ominous'
|
208 |
+
- 'Title: On Swift Horses
|
209 |
+
|
210 |
+
Genres: Drama, Romance
|
211 |
+
|
212 |
+
Overview: In the 1950s, a seemingly sensible newlywed and her wayward brother-in-law
|
213 |
+
undertake parallel journeys of risk, romance, and self-discovery.
|
214 |
+
|
215 |
+
Tagline: How much would you gamble for love?
|
216 |
+
|
217 |
+
Director: Daniel Minahan
|
218 |
+
|
219 |
+
Stars: Daisy Edgar-Jones, Jacob Elordi, Will Poulter
|
220 |
+
|
221 |
+
Release Date: 2025-04-24
|
222 |
+
|
223 |
+
Keywords: casino, based on novel or book, gambling, lesbian relationship, 1950s,
|
224 |
+
gay romance, gay relationship, same sex relationship, wistful, queer cinema, san
|
225 |
+
diego, lgbt history, queer history, lesbian couple, gay couple, romantic, ambiguous,
|
226 |
+
melodramatic, horse riding, queer love, queer romance, gay men, gay love story,
|
227 |
+
gay love, lgbtq, queer sexuality, lgbtq+'
|
228 |
+
- source_sentence: Movies about the dark side of Hollywood fame and power abuse
|
229 |
sentences:
|
230 |
+
- 'Title: Frances
|
231 |
+
|
232 |
+
Genres: Drama
|
233 |
+
|
234 |
+
Overview: The true story of Frances Farmer''s meteoric rise to fame in Hollywood
|
235 |
+
and the tragic turn her life took when she was blacklisted.
|
236 |
+
|
237 |
+
Tagline: Her story is shocking, disturbing, compelling... and true.
|
238 |
+
|
239 |
+
Director: Graeme Clifford
|
240 |
+
|
241 |
+
Stars: Jessica Lange, Sam Shepard, Kim Stanley
|
242 |
+
|
243 |
+
Release Date: 1982-12-03
|
244 |
+
|
245 |
+
Keywords: strong woman, falsely accused, insanity, movie business, feminism, biography,
|
246 |
+
based on true story, evil mother, psychiatric hospital, female protagonist, hollywood,
|
247 |
+
wrongful imprisonment, lost love, wrongful arrest, wrongful conviction, wrong
|
248 |
+
diagnosis, lobotomy, frances farmer, power abuse, mother daughter relationship'
|
249 |
+
- "Title: Come Drink with Me\nGenres: Action, Adventure\nOverview: Golden Swallow\
|
250 |
+
\ is a fighter-for-hire who has been contracted by the local government to retrieve\
|
251 |
+
\ the governor's kidnapped son. Holding him is a group of rebels who are demanding\
|
252 |
+
\ that their leader be released from prison in return for the captured son. After\
|
253 |
+
\ a brief encounter with the gang at a local restaurant, Golden Swallow is joined\
|
254 |
+
\ by an inebriated wanderer Drunken Cat who aids her in her mission.\nTagline:\
|
255 |
+
\ \nDirector: King Hu\nStars: Cheng Pei-Pei, Elliot Ngok Wah, Chen Hung-Lieh\n\
|
256 |
+
Release Date: 1966-04-07\nKeywords: kung fu, hero, showdown, kidnapping, warrior\
|
257 |
+
\ woman, gore, fistfight, forest, waterfall, murder, tough girl, monastery, heroine,\
|
258 |
+
\ inn, severed hand, wuxia, kung fu master, inner strength, beggar clan, tavern\
|
259 |
+
\ fight"
|
260 |
+
- "Title: Deva\nGenres: Action, Thriller, Mystery, Crime\nOverview: Dev Ambre, a\
|
261 |
+
\ ruthless cop, loses his memory in an accident just after he has finished solving\
|
262 |
+
\ a murder case and now has to reinvestigate it while keeping his memory loss\
|
263 |
+
\ a secret from everyone except DCP Farhan Khan.\nTagline: \nDirector: Rosshan\
|
264 |
+
\ Andrrews\nStars: Shahid Kapoor, Pooja Hegde, Pavail Gulati\nRelease Date: 2025-01-31\n\
|
265 |
+
Keywords: remake, based on movie, bollywood"
|
266 |
pipeline_tag: sentence-similarity
|
267 |
library_name: sentence-transformers
|
268 |
---
|
|
|
317 |
model = SentenceTransformer("sentence_transformers_model_id")
|
318 |
# Run inference
|
319 |
sentences = [
|
320 |
+
'Movies about the dark side of Hollywood fame and power abuse',
|
321 |
+
"Title: Frances\nGenres: Drama\nOverview: The true story of Frances Farmer's meteoric rise to fame in Hollywood and the tragic turn her life took when she was blacklisted.\nTagline: Her story is shocking, disturbing, compelling... and true.\nDirector: Graeme Clifford\nStars: Jessica Lange, Sam Shepard, Kim Stanley\nRelease Date: 1982-12-03\nKeywords: strong woman, falsely accused, insanity, movie business, feminism, biography, based on true story, evil mother, psychiatric hospital, female protagonist, hollywood, wrongful imprisonment, lost love, wrongful arrest, wrongful conviction, wrong diagnosis, lobotomy, frances farmer, power abuse, mother daughter relationship",
|
322 |
+
"Title: Come Drink with Me\nGenres: Action, Adventure\nOverview: Golden Swallow is a fighter-for-hire who has been contracted by the local government to retrieve the governor's kidnapped son. Holding him is a group of rebels who are demanding that their leader be released from prison in return for the captured son. After a brief encounter with the gang at a local restaurant, Golden Swallow is joined by an inebriated wanderer Drunken Cat who aids her in her mission.\nTagline: \nDirector: King Hu\nStars: Cheng Pei-Pei, Elliot Ngok Wah, Chen Hung-Lieh\nRelease Date: 1966-04-07\nKeywords: kung fu, hero, showdown, kidnapping, warrior woman, gore, fistfight, forest, waterfall, murder, tough girl, monastery, heroine, inn, severed hand, wuxia, kung fu master, inner strength, beggar clan, tavern fight",
|
323 |
]
|
324 |
embeddings = model.encode(sentences)
|
325 |
print(embeddings.shape)
|
|
|
373 |
|
374 |
#### Unnamed Dataset
|
375 |
|
376 |
+
* Size: 32,382 training samples
|
377 |
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
|
378 |
* Approximate statistics based on the first 1000 samples:
|
379 |
| | sentence_0 | sentence_1 | sentence_2 |
|
380 |
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
381 |
| type | string | string | string |
|
382 |
+
| details | <ul><li>min: 8 tokens</li><li>mean: 16.52 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>min: 37 tokens</li><li>mean: 151.92 tokens</li><li>max: 330 tokens</li></ul> | <ul><li>min: 48 tokens</li><li>mean: 146.76 tokens</li><li>max: 301 tokens</li></ul> |
|
383 |
* Samples:
|
384 |
+
| sentence_0 | sentence_1 | sentence_2 |
|
385 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
386 |
+
| <code>Something like a drama story dealing with disturbed teenager or life</code> | <code>Title: I Never Promised You a Rose Garden<br>Genres: Drama<br>Overview: A disturbed and institutionalized 16-year-old girl struggles between fantasy and reality.<br>Tagline: When she tried to kill herself, it was just the beginning.<br>Director: Anthony Page<br>Stars: Kathleen Quinlan, Bibi Andersson, Ben Piazza<br>Release Date: 1977-07-14<br>Keywords: disturbed teenager</code> | <code>Title: Event Horizon<br>Genres: Horror, Science Fiction, Mystery<br>Overview: In 2047, a group of astronauts are sent to investigate and salvage the starship Event Horizon which disappeared mysteriously seven years before on its maiden voyage. However, it soon becomes evident that something sinister resides in its corridors.<br>Tagline: Infinite space. Infinite terror.<br>Director: Paul W. S. Anderson<br>Stars: Laurence Fishburne, Sam Neill, Kathleen Quinlan<br>Release Date: 1997-08-15<br>Keywords: space marine, nightmare, insanity, delusion, hallucination, space travel, cryogenics, gore, black hole, crew, flashback, evil spirit, alternate dimension, hellgate, religion, explosion, burning man, rescue team, super power, trapped in space, distress signal, 2040s, spaceship</code> |
|
387 |
+
| <code>Stories of brave musketeers fighting against powerful adversaries for justice and love</code> | <code>Title: The Three Musketeers<br>Genres: Action, Adventure, Romance, Family<br>Overview: The young D'Artagnan arrives in Paris with dreams of becoming a King's musketeer. He meets and quarrels with three men, Athos, Porthos, and Aramis, each of whom challenges him to a duel. D'Artagnan finds out they are musketeers and is invited to join them in their efforts to oppose Cardinal Richelieu, who wishes to increase his already considerable power over the King. D'Artagnan must also juggle affairs with the charming Constance Bonancieux and the passionate Lady De Winter, a secret agent for the Cardinal.<br>Tagline: . . . One for All and All for Fun!<br>Director: Richard Lester<br>Stars: Michael York, Oliver Reed, Richard Chamberlain<br>Release Date: 1973-12-11<br>Keywords: france, paris, france, based on novel or book, swordplay, fight, satire, dressmaker, louis xiii, sword fight, swordsman, musketeer, extramarital affair, swashbuckler, diamond theft, sword duel, diamond necklace, cardinal, 17th century, queen jewe...</code> | <code>Title: The Brood<br>Genres: Horror, Science Fiction<br>Overview: A man tries to uncover an unconventional psychologist's therapy techniques on his institutionalized wife, while a series of brutal attacks committed by a brood of mutant children coincides with the husband's investigation.<br>Tagline: The Ultimate Experience in Inner Terror.<br>Director: David Cronenberg<br>Stars: Oliver Reed, Samantha Eggar, Art Hindle<br>Release Date: 1979-05-25<br>Keywords: toronto, canada, mutant, transformation, psychologist, divorce, psychotherapist, canuxploitation</code> |
|
388 |
+
| <code>Critically acclaimed drama films directed by Sarah Polley exploring the themes of illiteracy and based on novel or book</code> | <code>Title: Women Talking<br>Genres: Drama<br>Overview: A group of women in an isolated religious colony struggle to reconcile their faith with a series of sexual assaults committed by the colony's men.<br>Tagline: Do nothing. Stay and fight. Leave.<br>Director: Sarah Polley<br>Stars: Rooney Mara, Claire Foy, Jessie Buckley<br>Release Date: 2022-12-23<br>Keywords: rape, based on novel or book, faith, illiteracy, bolivia, mennonites, religion, gang rape, teenage rape, meeting, duringcreditsstinger, woman director, sexual assault, abusive husband, 2000s, pregnancy from rape</code> | <code>Title: Alice in Wonderland<br>Genres: Family, Fantasy, Adventure<br>Overview: Alice, now 19 years old, returns to the whimsical world she first entered as a child and embarks on a journey to discover her true destiny.<br>Tagline: You're invited to a very important date.<br>Director: Tim Burton<br>Stars: Mia Wasikowska, Johnny Depp, Anne Hathaway<br>Release Date: 2010-03-03<br>Keywords: based on novel or book, queen, psychotic, fantasy world, taunting, live action remake, based on young adult novel, mischievous, absurd, dramatic, incredulous, amused, euphoric</code> |
|
389 |
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
390 |
```json
|
391 |
{
|
|
|
525 |
### Training Logs
|
526 |
| Epoch | Step | Training Loss |
|
527 |
|:------:|:----:|:-------------:|
|
528 |
+
| 0.4941 | 500 | 0.796 |
|
529 |
+
| 0.9881 | 1000 | 0.517 |
|
530 |
+
| 1.4822 | 1500 | 0.3748 |
|
531 |
+
| 1.9763 | 2000 | 0.3682 |
|
532 |
+
| 2.4704 | 2500 | 0.2839 |
|
533 |
+
| 2.9644 | 3000 | 0.2849 |
|
534 |
+
| 3.4585 | 3500 | 0.2392 |
|
535 |
+
| 3.9526 | 4000 | 0.2373 |
|
536 |
|
537 |
|
538 |
### Framework Versions
|
|
|
541 |
- Transformers: 4.51.3
|
542 |
- PyTorch: 2.6.0+cu124
|
543 |
- Accelerate: 1.6.0
|
544 |
+
- Datasets: 3.5.1
|
545 |
- Tokenizers: 0.21.1
|
546 |
|
547 |
## Citation
|
config.json
CHANGED
@@ -1,34 +1,31 @@
|
|
1 |
{
|
2 |
-
"activation": "gelu",
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"
|
9 |
-
"
|
|
|
|
|
10 |
"id2label": {
|
11 |
-
"0": "
|
12 |
-
"1": "recommendation",
|
13 |
-
"2": "factual"
|
14 |
},
|
15 |
"initializer_range": 0.02,
|
|
|
16 |
"label2id": {
|
17 |
-
"
|
18 |
-
"generic": 0,
|
19 |
-
"recommendation": 1
|
20 |
},
|
|
|
21 |
"max_position_embeddings": 512,
|
22 |
-
"model_type": "
|
23 |
-
"
|
24 |
-
"
|
25 |
"pad_token_id": 0,
|
26 |
-
"
|
27 |
-
"qa_dropout": 0.1,
|
28 |
-
"seq_classif_dropout": 0.2,
|
29 |
-
"sinusoidal_pos_embds": false,
|
30 |
-
"tie_weights_": true,
|
31 |
"torch_dtype": "float32",
|
32 |
"transformers_version": "4.51.3",
|
|
|
|
|
33 |
"vocab_size": 30522
|
34 |
}
|
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
+
"BertModel"
|
4 |
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
|
|
|
|
13 |
},
|
14 |
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 3072,
|
16 |
"label2id": {
|
17 |
+
"LABEL_0": 0
|
|
|
|
|
18 |
},
|
19 |
+
"layer_norm_eps": 1e-12,
|
20 |
"max_position_embeddings": 512,
|
21 |
+
"model_type": "bert",
|
22 |
+
"num_attention_heads": 12,
|
23 |
+
"num_hidden_layers": 12,
|
24 |
"pad_token_id": 0,
|
25 |
+
"position_embedding_type": "absolute",
|
|
|
|
|
|
|
|
|
26 |
"torch_dtype": "float32",
|
27 |
"transformers_version": "4.51.3",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
"vocab_size": 30522
|
31 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dc94bc439d0bf2b7099d1256849d930dc9044a172f401187307d00df9b9d3b7
|
3 |
+
size 437951328
|
tokenizer.json
CHANGED
@@ -2,14 +2,12 @@
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
-
"max_length":
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
9 |
"padding": {
|
10 |
-
"strategy":
|
11 |
-
"Fixed": 128
|
12 |
-
},
|
13 |
"direction": "Right",
|
14 |
"pad_to_multiple_of": null,
|
15 |
"pad_id": 0,
|
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
+
"max_length": 512,
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
9 |
"padding": {
|
10 |
+
"strategy": "BatchLongest",
|
|
|
|
|
11 |
"direction": "Right",
|
12 |
"pad_to_multiple_of": null,
|
13 |
"pad_id": 0,
|
tokenizer_config.json
CHANGED
@@ -41,23 +41,18 @@
|
|
41 |
"special": true
|
42 |
}
|
43 |
},
|
44 |
-
"clean_up_tokenization_spaces":
|
45 |
"cls_token": "[CLS]",
|
|
|
46 |
"do_lower_case": true,
|
47 |
"extra_special_tokens": {},
|
48 |
"mask_token": "[MASK]",
|
49 |
-
"max_length": 128,
|
50 |
"model_max_length": 512,
|
51 |
-
"
|
52 |
"pad_token": "[PAD]",
|
53 |
-
"pad_token_type_id": 0,
|
54 |
-
"padding_side": "right",
|
55 |
"sep_token": "[SEP]",
|
56 |
-
"stride": 0,
|
57 |
"strip_accents": null,
|
58 |
"tokenize_chinese_chars": true,
|
59 |
-
"tokenizer_class": "
|
60 |
-
"truncation_side": "right",
|
61 |
-
"truncation_strategy": "longest_first",
|
62 |
"unk_token": "[UNK]"
|
63 |
}
|
|
|
41 |
"special": true
|
42 |
}
|
43 |
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
48 |
"extra_special_tokens": {},
|
49 |
"mask_token": "[MASK]",
|
|
|
50 |
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
"pad_token": "[PAD]",
|
|
|
|
|
53 |
"sep_token": "[SEP]",
|
|
|
54 |
"strip_accents": null,
|
55 |
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
|
|
|
|
57 |
"unk_token": "[UNK]"
|
58 |
}
|