abdouaziiz commited on
Commit
cc7f087
·
verified ·
1 Parent(s): 769511f

End of training

Browse files
README.md CHANGED
@@ -4,11 +4,23 @@ license: apache-2.0
4
  base_model: openai/whisper-small
5
  tags:
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - wer
9
  model-index:
10
  - name: whisper-medium-v3-ff4
11
- results: []
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,10 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # whisper-medium-v3-ff4
18
 
19
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2615
22
- - Wer: 0.1660
23
 
24
  ## Model description
25
 
 
4
  base_model: openai/whisper-small
5
  tags:
6
  - generated_from_trainer
7
+ datasets:
8
+ - abdouaziiz/fulfulde_lam
9
  metrics:
10
  - wer
11
  model-index:
12
  - name: whisper-medium-v3-ff4
13
+ results:
14
+ - task:
15
+ name: Automatic Speech Recognition
16
+ type: automatic-speech-recognition
17
+ dataset:
18
+ name: abdouaziiz/fulfulde_lam
19
+ type: abdouaziiz/fulfulde_lam
20
+ metrics:
21
+ - name: Wer
22
+ type: wer
23
+ value: 0.16938691239432335
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
28
 
29
  # whisper-medium-v3-ff4
30
 
31
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the abdouaziiz/fulfulde_lam dataset.
32
  It achieves the following results on the evaluation set:
33
+ - Loss: 0.2572
34
+ - Wer: 0.1694
35
 
36
  ## Model description
37
 
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.3243830712185143,
3
+ "eval_loss": 0.25722527503967285,
4
+ "eval_runtime": 1093.9822,
5
+ "eval_samples": 3468,
6
+ "eval_samples_per_second": 3.17,
7
+ "eval_steps_per_second": 0.397,
8
+ "eval_wer": 0.16938691239432335,
9
+ "total_flos": 6.002316599795712e+19,
10
+ "train_loss": 0.2320793916262113,
11
+ "train_runtime": 50624.8997,
12
+ "train_samples": 62565,
13
+ "train_samples_per_second": 4.109,
14
+ "train_steps_per_second": 0.514
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.3243830712185143,
3
+ "eval_loss": 0.25722527503967285,
4
+ "eval_runtime": 1093.9822,
5
+ "eval_samples": 3468,
6
+ "eval_samples_per_second": 3.17,
7
+ "eval_steps_per_second": 0.397,
8
+ "eval_wer": 0.16938691239432335
9
+ }
runs/Apr27_16-19-19_yastraining/events.out.tfevents.1745824619.yastraining.419215.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75be722ad2b0966f0637dfb922bedae0424b482f3596b92d4508b32f049dc67f
3
+ size 412
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.3243830712185143,
3
+ "total_flos": 6.002316599795712e+19,
4
+ "train_loss": 0.2320793916262113,
5
+ "train_runtime": 50624.8997,
6
+ "train_samples": 62565,
7
+ "train_samples_per_second": 4.109,
8
+ "train_steps_per_second": 0.514
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.25722527503967285,
3
+ "best_model_checkpoint": "whisper-medium-v3-ff4/checkpoint-20000",
4
+ "epoch": 3.3243830712185143,
5
+ "eval_steps": 5000,
6
+ "global_step": 26000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06393044367727913,
13
+ "grad_norm": 8.506481170654297,
14
+ "learning_rate": 1.965626204238921e-05,
15
+ "loss": 0.9546,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.12786088735455825,
20
+ "grad_norm": 9.10377025604248,
21
+ "learning_rate": 1.9270905587668595e-05,
22
+ "loss": 0.5883,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.19179133103183735,
27
+ "grad_norm": 6.146899223327637,
28
+ "learning_rate": 1.888554913294798e-05,
29
+ "loss": 0.5081,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.2557217747091165,
34
+ "grad_norm": 4.1534929275512695,
35
+ "learning_rate": 1.8500192678227363e-05,
36
+ "loss": 0.4631,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.3196522183863956,
41
+ "grad_norm": 5.8231587409973145,
42
+ "learning_rate": 1.8114836223506743e-05,
43
+ "loss": 0.4313,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.3835826620636747,
48
+ "grad_norm": 7.882592678070068,
49
+ "learning_rate": 1.772947976878613e-05,
50
+ "loss": 0.417,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.4475131057409538,
55
+ "grad_norm": 5.645440101623535,
56
+ "learning_rate": 1.7344123314065514e-05,
57
+ "loss": 0.4162,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.511443549418233,
62
+ "grad_norm": 4.277677536010742,
63
+ "learning_rate": 1.6958766859344894e-05,
64
+ "loss": 0.378,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.5753739930955121,
69
+ "grad_norm": 4.594604969024658,
70
+ "learning_rate": 1.6573410404624278e-05,
71
+ "loss": 0.3653,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.6393044367727913,
76
+ "grad_norm": 6.918728351593018,
77
+ "learning_rate": 1.6188053949903662e-05,
78
+ "loss": 0.3543,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.6393044367727913,
83
+ "eval_loss": 0.3379935026168823,
84
+ "eval_runtime": 1120.3014,
85
+ "eval_samples_per_second": 3.096,
86
+ "eval_steps_per_second": 0.387,
87
+ "eval_wer": 0.2361289200878013,
88
+ "step": 5000
89
+ },
90
+ {
91
+ "epoch": 0.7032348804500703,
92
+ "grad_norm": 5.27606725692749,
93
+ "learning_rate": 1.5802697495183046e-05,
94
+ "loss": 0.34,
95
+ "step": 5500
96
+ },
97
+ {
98
+ "epoch": 0.7671653241273494,
99
+ "grad_norm": 8.176605224609375,
100
+ "learning_rate": 1.541734104046243e-05,
101
+ "loss": 0.3273,
102
+ "step": 6000
103
+ },
104
+ {
105
+ "epoch": 0.8310957678046286,
106
+ "grad_norm": 4.268400192260742,
107
+ "learning_rate": 1.5032755298651254e-05,
108
+ "loss": 0.3305,
109
+ "step": 6500
110
+ },
111
+ {
112
+ "epoch": 0.8950262114819076,
113
+ "grad_norm": 7.465890884399414,
114
+ "learning_rate": 1.4647398843930638e-05,
115
+ "loss": 0.318,
116
+ "step": 7000
117
+ },
118
+ {
119
+ "epoch": 0.9589566551591868,
120
+ "grad_norm": 7.091923236846924,
121
+ "learning_rate": 1.426204238921002e-05,
122
+ "loss": 0.2965,
123
+ "step": 7500
124
+ },
125
+ {
126
+ "epoch": 1.022887098836466,
127
+ "grad_norm": 5.541090965270996,
128
+ "learning_rate": 1.3878227360308287e-05,
129
+ "loss": 0.2865,
130
+ "step": 8000
131
+ },
132
+ {
133
+ "epoch": 1.086817542513745,
134
+ "grad_norm": 5.313994407653809,
135
+ "learning_rate": 1.349287090558767e-05,
136
+ "loss": 0.2109,
137
+ "step": 8500
138
+ },
139
+ {
140
+ "epoch": 1.1507479861910241,
141
+ "grad_norm": 4.2993621826171875,
142
+ "learning_rate": 1.3107514450867053e-05,
143
+ "loss": 0.2191,
144
+ "step": 9000
145
+ },
146
+ {
147
+ "epoch": 1.2146784298683033,
148
+ "grad_norm": 5.328224182128906,
149
+ "learning_rate": 1.2722157996146436e-05,
150
+ "loss": 0.2196,
151
+ "step": 9500
152
+ },
153
+ {
154
+ "epoch": 1.2786088735455823,
155
+ "grad_norm": 5.929138660430908,
156
+ "learning_rate": 1.2336801541425821e-05,
157
+ "loss": 0.2259,
158
+ "step": 10000
159
+ },
160
+ {
161
+ "epoch": 1.2786088735455823,
162
+ "eval_loss": 0.2893017828464508,
163
+ "eval_runtime": 1112.5698,
164
+ "eval_samples_per_second": 3.117,
165
+ "eval_steps_per_second": 0.39,
166
+ "eval_wer": 0.2021819920457262,
167
+ "step": 10000
168
+ },
169
+ {
170
+ "epoch": 1.3425393172228615,
171
+ "grad_norm": 4.919782638549805,
172
+ "learning_rate": 1.1951445086705203e-05,
173
+ "loss": 0.2224,
174
+ "step": 10500
175
+ },
176
+ {
177
+ "epoch": 1.4064697609001406,
178
+ "grad_norm": 5.959251403808594,
179
+ "learning_rate": 1.1566088631984587e-05,
180
+ "loss": 0.2125,
181
+ "step": 11000
182
+ },
183
+ {
184
+ "epoch": 1.4704002045774198,
185
+ "grad_norm": 4.974765777587891,
186
+ "learning_rate": 1.118073217726397e-05,
187
+ "loss": 0.2074,
188
+ "step": 11500
189
+ },
190
+ {
191
+ "epoch": 1.534330648254699,
192
+ "grad_norm": 5.756904125213623,
193
+ "learning_rate": 1.0795375722543353e-05,
194
+ "loss": 0.2089,
195
+ "step": 12000
196
+ },
197
+ {
198
+ "epoch": 1.598261091931978,
199
+ "grad_norm": 3.7231180667877197,
200
+ "learning_rate": 1.0410019267822737e-05,
201
+ "loss": 0.215,
202
+ "step": 12500
203
+ },
204
+ {
205
+ "epoch": 1.6621915356092571,
206
+ "grad_norm": 4.505956172943115,
207
+ "learning_rate": 1.0024662813102119e-05,
208
+ "loss": 0.204,
209
+ "step": 13000
210
+ },
211
+ {
212
+ "epoch": 1.7261219792865363,
213
+ "grad_norm": 2.797830820083618,
214
+ "learning_rate": 9.639306358381504e-06,
215
+ "loss": 0.1983,
216
+ "step": 13500
217
+ },
218
+ {
219
+ "epoch": 1.7900524229638153,
220
+ "grad_norm": 6.62880802154541,
221
+ "learning_rate": 9.254720616570328e-06,
222
+ "loss": 0.2004,
223
+ "step": 14000
224
+ },
225
+ {
226
+ "epoch": 1.8539828666410945,
227
+ "grad_norm": 4.8896284103393555,
228
+ "learning_rate": 8.869364161849712e-06,
229
+ "loss": 0.2023,
230
+ "step": 14500
231
+ },
232
+ {
233
+ "epoch": 1.9179133103183736,
234
+ "grad_norm": 4.615833759307861,
235
+ "learning_rate": 8.484007707129094e-06,
236
+ "loss": 0.1938,
237
+ "step": 15000
238
+ },
239
+ {
240
+ "epoch": 1.9179133103183736,
241
+ "eval_loss": 0.2596728801727295,
242
+ "eval_runtime": 1104.7223,
243
+ "eval_samples_per_second": 3.139,
244
+ "eval_steps_per_second": 0.393,
245
+ "eval_wer": 0.1790146263012627,
246
+ "step": 15000
247
+ },
248
+ {
249
+ "epoch": 1.9818437539956526,
250
+ "grad_norm": 2.99739408493042,
251
+ "learning_rate": 8.098651252408478e-06,
252
+ "loss": 0.1871,
253
+ "step": 15500
254
+ },
255
+ {
256
+ "epoch": 2.045774197672932,
257
+ "grad_norm": 1.4894932508468628,
258
+ "learning_rate": 7.713294797687862e-06,
259
+ "loss": 0.1419,
260
+ "step": 16000
261
+ },
262
+ {
263
+ "epoch": 2.109704641350211,
264
+ "grad_norm": 3.1801295280456543,
265
+ "learning_rate": 7.327938342967246e-06,
266
+ "loss": 0.1193,
267
+ "step": 16500
268
+ },
269
+ {
270
+ "epoch": 2.17363508502749,
271
+ "grad_norm": 3.464812994003296,
272
+ "learning_rate": 6.942581888246629e-06,
273
+ "loss": 0.1196,
274
+ "step": 17000
275
+ },
276
+ {
277
+ "epoch": 2.2375655287047693,
278
+ "grad_norm": 2.4718642234802246,
279
+ "learning_rate": 6.5572254335260125e-06,
280
+ "loss": 0.1173,
281
+ "step": 17500
282
+ },
283
+ {
284
+ "epoch": 2.3014959723820483,
285
+ "grad_norm": 4.073084354400635,
286
+ "learning_rate": 6.172639691714837e-06,
287
+ "loss": 0.1179,
288
+ "step": 18000
289
+ },
290
+ {
291
+ "epoch": 2.3654264160593277,
292
+ "grad_norm": 2.377967119216919,
293
+ "learning_rate": 5.78728323699422e-06,
294
+ "loss": 0.1158,
295
+ "step": 18500
296
+ },
297
+ {
298
+ "epoch": 2.4293568597366066,
299
+ "grad_norm": 6.478204250335693,
300
+ "learning_rate": 5.401926782273603e-06,
301
+ "loss": 0.1229,
302
+ "step": 19000
303
+ },
304
+ {
305
+ "epoch": 2.4932873034138856,
306
+ "grad_norm": 1.8411247730255127,
307
+ "learning_rate": 5.016570327552986e-06,
308
+ "loss": 0.1144,
309
+ "step": 19500
310
+ },
311
+ {
312
+ "epoch": 2.5572177470911646,
313
+ "grad_norm": 3.3542532920837402,
314
+ "learning_rate": 4.631984585741811e-06,
315
+ "loss": 0.1119,
316
+ "step": 20000
317
+ },
318
+ {
319
+ "epoch": 2.5572177470911646,
320
+ "eval_loss": 0.25722527503967285,
321
+ "eval_runtime": 1107.3303,
322
+ "eval_samples_per_second": 3.132,
323
+ "eval_steps_per_second": 0.392,
324
+ "eval_wer": 0.16938691239432335,
325
+ "step": 20000
326
+ },
327
+ {
328
+ "epoch": 2.621148190768444,
329
+ "grad_norm": 4.66868257522583,
330
+ "learning_rate": 4.246628131021195e-06,
331
+ "loss": 0.1126,
332
+ "step": 20500
333
+ },
334
+ {
335
+ "epoch": 2.685078634445723,
336
+ "grad_norm": 5.47554874420166,
337
+ "learning_rate": 3.861271676300578e-06,
338
+ "loss": 0.1171,
339
+ "step": 21000
340
+ },
341
+ {
342
+ "epoch": 2.7490090781230023,
343
+ "grad_norm": 3.905674695968628,
344
+ "learning_rate": 3.475915221579962e-06,
345
+ "loss": 0.1158,
346
+ "step": 21500
347
+ },
348
+ {
349
+ "epoch": 2.8129395218002813,
350
+ "grad_norm": 3.0056424140930176,
351
+ "learning_rate": 3.0913294797687864e-06,
352
+ "loss": 0.1133,
353
+ "step": 22000
354
+ },
355
+ {
356
+ "epoch": 2.8768699654775602,
357
+ "grad_norm": 1.467344880104065,
358
+ "learning_rate": 2.70597302504817e-06,
359
+ "loss": 0.1085,
360
+ "step": 22500
361
+ },
362
+ {
363
+ "epoch": 2.9408004091548396,
364
+ "grad_norm": 1.9786620140075684,
365
+ "learning_rate": 2.3213872832369944e-06,
366
+ "loss": 0.1084,
367
+ "step": 23000
368
+ },
369
+ {
370
+ "epoch": 3.0047308528321186,
371
+ "grad_norm": 3.245360851287842,
372
+ "learning_rate": 1.9360308285163777e-06,
373
+ "loss": 0.102,
374
+ "step": 23500
375
+ },
376
+ {
377
+ "epoch": 3.0686612965093976,
378
+ "grad_norm": 0.7349840998649597,
379
+ "learning_rate": 1.5506743737957611e-06,
380
+ "loss": 0.063,
381
+ "step": 24000
382
+ },
383
+ {
384
+ "epoch": 3.132591740186677,
385
+ "grad_norm": 1.5207897424697876,
386
+ "learning_rate": 1.1653179190751445e-06,
387
+ "loss": 0.0616,
388
+ "step": 24500
389
+ },
390
+ {
391
+ "epoch": 3.196522183863956,
392
+ "grad_norm": 3.616459846496582,
393
+ "learning_rate": 7.799614643545279e-07,
394
+ "loss": 0.0632,
395
+ "step": 25000
396
+ },
397
+ {
398
+ "epoch": 3.196522183863956,
399
+ "eval_loss": 0.2614915072917938,
400
+ "eval_runtime": 1121.1739,
401
+ "eval_samples_per_second": 3.093,
402
+ "eval_steps_per_second": 0.387,
403
+ "eval_wer": 0.16601829917631974,
404
+ "step": 25000
405
+ },
406
+ {
407
+ "epoch": 3.2604526275412353,
408
+ "grad_norm": 1.1991757154464722,
409
+ "learning_rate": 3.946050096339114e-07,
410
+ "loss": 0.0598,
411
+ "step": 25500
412
+ },
413
+ {
414
+ "epoch": 3.3243830712185143,
415
+ "grad_norm": 3.6829333305358887,
416
+ "learning_rate": 9.248554913294799e-09,
417
+ "loss": 0.0593,
418
+ "step": 26000
419
+ },
420
+ {
421
+ "epoch": 3.3243830712185143,
422
+ "step": 26000,
423
+ "total_flos": 6.002316599795712e+19,
424
+ "train_loss": 0.2320793916262113,
425
+ "train_runtime": 50624.8997,
426
+ "train_samples_per_second": 4.109,
427
+ "train_steps_per_second": 0.514
428
+ }
429
+ ],
430
+ "logging_steps": 500,
431
+ "max_steps": 26000,
432
+ "num_input_tokens_seen": 0,
433
+ "num_train_epochs": 4,
434
+ "save_steps": 5000,
435
+ "stateful_callbacks": {
436
+ "EarlyStoppingCallback": {
437
+ "args": {
438
+ "early_stopping_patience": 3,
439
+ "early_stopping_threshold": 0.0
440
+ },
441
+ "attributes": {
442
+ "early_stopping_patience_counter": 1
443
+ }
444
+ },
445
+ "TrainerControl": {
446
+ "args": {
447
+ "should_epoch_stop": false,
448
+ "should_evaluate": false,
449
+ "should_log": false,
450
+ "should_save": true,
451
+ "should_training_stop": true
452
+ },
453
+ "attributes": {}
454
+ }
455
+ },
456
+ "total_flos": 6.002316599795712e+19,
457
+ "train_batch_size": 8,
458
+ "trial_name": null,
459
+ "trial_params": null
460
+ }