csikasote commited on
Commit
98d8745
·
verified ·
1 Parent(s): 07b5325

End of training

Browse files
README.md CHANGED
@@ -4,11 +4,23 @@ license: apache-2.0
4
  base_model: openai/whisper-medium
5
  tags:
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - wer
9
  model-index:
10
  - name: whisper-medium-bigcgen-baseline-42
11
- results: []
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,10 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # whisper-medium-bigcgen-baseline-42
18
 
19
- This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.7934
22
- - Wer: 0.4812
23
 
24
  ## Model description
25
 
 
4
  base_model: openai/whisper-medium
5
  tags:
6
  - generated_from_trainer
7
+ datasets:
8
+ - bigcgen
9
  metrics:
10
  - wer
11
  model-index:
12
  - name: whisper-medium-bigcgen-baseline-42
13
+ results:
14
+ - task:
15
+ name: Automatic Speech Recognition
16
+ type: automatic-speech-recognition
17
+ dataset:
18
+ name: bigcgen
19
+ type: bigcgen
20
+ metrics:
21
+ - name: Wer
22
+ type: wer
23
+ value: 0.526129108536297
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
28
 
29
  # whisper-medium-bigcgen-baseline-42
30
 
31
+ This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the bigcgen dataset.
32
  It achieves the following results on the evaluation set:
33
+ - Loss: 0.6970
34
+ - Wer: 0.5261
35
 
36
  ## Model description
37
 
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.878718535469107,
3
+ "eval_loss": 0.6969724297523499,
4
+ "eval_runtime": 285.0097,
5
+ "eval_samples": 456,
6
+ "eval_samples_per_second": 1.6,
7
+ "eval_steps_per_second": 0.8,
8
+ "eval_wer": 0.526129108536297,
9
+ "total_flos": 1.305149555736576e+19,
10
+ "train_loss": 0.5828571186959743,
11
+ "train_runtime": 5664.3172,
12
+ "train_samples": 2621,
13
+ "train_samples_per_second": 7.062,
14
+ "train_steps_per_second": 0.883
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.878718535469107,
3
+ "eval_loss": 0.6969724297523499,
4
+ "eval_runtime": 285.0097,
5
+ "eval_samples": 456,
6
+ "eval_samples_per_second": 1.6,
7
+ "eval_steps_per_second": 0.8,
8
+ "eval_wer": 0.526129108536297
9
+ }
runs/Aug09_10-34-49_srvrocgpu011.uct.ac.za/events.out.tfevents.1754734642.srvrocgpu011.uct.ac.za.3063412.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e351461512fcb9d6576b7b938a636cdd95283a9ef67effaaef60953e955b8efb
3
+ size 406
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.878718535469107,
3
+ "total_flos": 1.305149555736576e+19,
4
+ "train_loss": 0.5828571186959743,
5
+ "train_runtime": 5664.3172,
6
+ "train_samples": 2621,
7
+ "train_samples_per_second": 7.062,
8
+ "train_steps_per_second": 0.883
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,572 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 800,
3
+ "best_metric": 0.6969724297523499,
4
+ "best_model_checkpoint": "/scratch/skscla001/experiments/datasets/results/whisper-medium-bigcgen-baseline-42/checkpoint-800",
5
+ "epoch": 4.878718535469107,
6
+ "eval_steps": 200,
7
+ "global_step": 1600,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.07627765064836003,
14
+ "grad_norm": 32.69679260253906,
15
+ "learning_rate": 4.2000000000000006e-07,
16
+ "loss": 3.3916,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.15255530129672007,
21
+ "grad_norm": 27.17233657836914,
22
+ "learning_rate": 9.200000000000001e-07,
23
+ "loss": 2.7163,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.2288329519450801,
28
+ "grad_norm": 22.44352912902832,
29
+ "learning_rate": 1.42e-06,
30
+ "loss": 2.1063,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 0.30511060259344014,
35
+ "grad_norm": 17.82234764099121,
36
+ "learning_rate": 1.9200000000000003e-06,
37
+ "loss": 1.5258,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.38138825324180015,
42
+ "grad_norm": 16.569429397583008,
43
+ "learning_rate": 2.42e-06,
44
+ "loss": 1.2767,
45
+ "step": 125
46
+ },
47
+ {
48
+ "epoch": 0.4576659038901602,
49
+ "grad_norm": 18.720935821533203,
50
+ "learning_rate": 2.9e-06,
51
+ "loss": 1.1612,
52
+ "step": 150
53
+ },
54
+ {
55
+ "epoch": 0.5339435545385202,
56
+ "grad_norm": 18.467731475830078,
57
+ "learning_rate": 3.4000000000000005e-06,
58
+ "loss": 1.0317,
59
+ "step": 175
60
+ },
61
+ {
62
+ "epoch": 0.6102212051868803,
63
+ "grad_norm": 9.183775901794434,
64
+ "learning_rate": 3.900000000000001e-06,
65
+ "loss": 1.0745,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 0.6102212051868803,
70
+ "eval_loss": 0.9366264939308167,
71
+ "eval_runtime": 278.3426,
72
+ "eval_samples_per_second": 1.638,
73
+ "eval_steps_per_second": 0.819,
74
+ "eval_wer": 0.6486166942539607,
75
+ "step": 200
76
+ },
77
+ {
78
+ "epoch": 0.6864988558352403,
79
+ "grad_norm": 20.13412857055664,
80
+ "learning_rate": 4.4e-06,
81
+ "loss": 1.0986,
82
+ "step": 225
83
+ },
84
+ {
85
+ "epoch": 0.7627765064836003,
86
+ "grad_norm": 17.606948852539062,
87
+ "learning_rate": 4.9000000000000005e-06,
88
+ "loss": 0.8614,
89
+ "step": 250
90
+ },
91
+ {
92
+ "epoch": 0.8390541571319603,
93
+ "grad_norm": 13.448685646057129,
94
+ "learning_rate": 5.400000000000001e-06,
95
+ "loss": 0.9057,
96
+ "step": 275
97
+ },
98
+ {
99
+ "epoch": 0.9153318077803204,
100
+ "grad_norm": 18.768592834472656,
101
+ "learning_rate": 5.9e-06,
102
+ "loss": 0.9076,
103
+ "step": 300
104
+ },
105
+ {
106
+ "epoch": 0.9916094584286804,
107
+ "grad_norm": 12.053692817687988,
108
+ "learning_rate": 6.4000000000000006e-06,
109
+ "loss": 0.7214,
110
+ "step": 325
111
+ },
112
+ {
113
+ "epoch": 1.0671243325705568,
114
+ "grad_norm": 13.019783020019531,
115
+ "learning_rate": 6.9e-06,
116
+ "loss": 0.7012,
117
+ "step": 350
118
+ },
119
+ {
120
+ "epoch": 1.1434019832189168,
121
+ "grad_norm": 16.344667434692383,
122
+ "learning_rate": 7.4e-06,
123
+ "loss": 0.7199,
124
+ "step": 375
125
+ },
126
+ {
127
+ "epoch": 1.2196796338672768,
128
+ "grad_norm": 13.561891555786133,
129
+ "learning_rate": 7.9e-06,
130
+ "loss": 0.6532,
131
+ "step": 400
132
+ },
133
+ {
134
+ "epoch": 1.2196796338672768,
135
+ "eval_loss": 0.7690292000770569,
136
+ "eval_runtime": 283.1267,
137
+ "eval_samples_per_second": 1.611,
138
+ "eval_steps_per_second": 0.805,
139
+ "eval_wer": 0.5467013478363679,
140
+ "step": 400
141
+ },
142
+ {
143
+ "epoch": 1.2959572845156369,
144
+ "grad_norm": 15.391003608703613,
145
+ "learning_rate": 8.400000000000001e-06,
146
+ "loss": 0.616,
147
+ "step": 425
148
+ },
149
+ {
150
+ "epoch": 1.372234935163997,
151
+ "grad_norm": 14.098180770874023,
152
+ "learning_rate": 8.900000000000001e-06,
153
+ "loss": 0.6977,
154
+ "step": 450
155
+ },
156
+ {
157
+ "epoch": 1.4485125858123569,
158
+ "grad_norm": 12.179559707641602,
159
+ "learning_rate": 9.4e-06,
160
+ "loss": 0.6716,
161
+ "step": 475
162
+ },
163
+ {
164
+ "epoch": 1.524790236460717,
165
+ "grad_norm": 18.046316146850586,
166
+ "learning_rate": 9.9e-06,
167
+ "loss": 0.6829,
168
+ "step": 500
169
+ },
170
+ {
171
+ "epoch": 1.6010678871090769,
172
+ "grad_norm": 12.71342658996582,
173
+ "learning_rate": 9.955555555555556e-06,
174
+ "loss": 0.7193,
175
+ "step": 525
176
+ },
177
+ {
178
+ "epoch": 1.677345537757437,
179
+ "grad_norm": 11.328106880187988,
180
+ "learning_rate": 9.9e-06,
181
+ "loss": 0.6129,
182
+ "step": 550
183
+ },
184
+ {
185
+ "epoch": 1.7536231884057971,
186
+ "grad_norm": 11.869524002075195,
187
+ "learning_rate": 9.844444444444446e-06,
188
+ "loss": 0.6352,
189
+ "step": 575
190
+ },
191
+ {
192
+ "epoch": 1.8299008390541571,
193
+ "grad_norm": 12.608686447143555,
194
+ "learning_rate": 9.78888888888889e-06,
195
+ "loss": 0.6347,
196
+ "step": 600
197
+ },
198
+ {
199
+ "epoch": 1.8299008390541571,
200
+ "eval_loss": 0.7060141563415527,
201
+ "eval_runtime": 285.6618,
202
+ "eval_samples_per_second": 1.596,
203
+ "eval_steps_per_second": 0.798,
204
+ "eval_wer": 0.5128872073776306,
205
+ "step": 600
206
+ },
207
+ {
208
+ "epoch": 1.9061784897025171,
209
+ "grad_norm": 14.89091968536377,
210
+ "learning_rate": 9.733333333333334e-06,
211
+ "loss": 0.6794,
212
+ "step": 625
213
+ },
214
+ {
215
+ "epoch": 1.9824561403508771,
216
+ "grad_norm": 17.0143985748291,
217
+ "learning_rate": 9.677777777777778e-06,
218
+ "loss": 0.6517,
219
+ "step": 650
220
+ },
221
+ {
222
+ "epoch": 2.0579710144927534,
223
+ "grad_norm": 11.61669921875,
224
+ "learning_rate": 9.622222222222222e-06,
225
+ "loss": 0.4539,
226
+ "step": 675
227
+ },
228
+ {
229
+ "epoch": 2.1342486651411137,
230
+ "grad_norm": 6.573554515838623,
231
+ "learning_rate": 9.566666666666668e-06,
232
+ "loss": 0.3674,
233
+ "step": 700
234
+ },
235
+ {
236
+ "epoch": 2.2105263157894735,
237
+ "grad_norm": 11.21182918548584,
238
+ "learning_rate": 9.511111111111112e-06,
239
+ "loss": 0.3828,
240
+ "step": 725
241
+ },
242
+ {
243
+ "epoch": 2.2868039664378337,
244
+ "grad_norm": 14.237340927124023,
245
+ "learning_rate": 9.455555555555557e-06,
246
+ "loss": 0.3999,
247
+ "step": 750
248
+ },
249
+ {
250
+ "epoch": 2.363081617086194,
251
+ "grad_norm": 9.172303199768066,
252
+ "learning_rate": 9.4e-06,
253
+ "loss": 0.4579,
254
+ "step": 775
255
+ },
256
+ {
257
+ "epoch": 2.4393592677345537,
258
+ "grad_norm": 10.187737464904785,
259
+ "learning_rate": 9.344444444444446e-06,
260
+ "loss": 0.4066,
261
+ "step": 800
262
+ },
263
+ {
264
+ "epoch": 2.4393592677345537,
265
+ "eval_loss": 0.6969724297523499,
266
+ "eval_runtime": 289.906,
267
+ "eval_samples_per_second": 1.573,
268
+ "eval_steps_per_second": 0.786,
269
+ "eval_wer": 0.526129108536297,
270
+ "step": 800
271
+ },
272
+ {
273
+ "epoch": 2.515636918382914,
274
+ "grad_norm": 11.924391746520996,
275
+ "learning_rate": 9.28888888888889e-06,
276
+ "loss": 0.382,
277
+ "step": 825
278
+ },
279
+ {
280
+ "epoch": 2.5919145690312737,
281
+ "grad_norm": 10.492657661437988,
282
+ "learning_rate": 9.233333333333334e-06,
283
+ "loss": 0.3796,
284
+ "step": 850
285
+ },
286
+ {
287
+ "epoch": 2.668192219679634,
288
+ "grad_norm": 9.099035263061523,
289
+ "learning_rate": 9.17777777777778e-06,
290
+ "loss": 0.5154,
291
+ "step": 875
292
+ },
293
+ {
294
+ "epoch": 2.744469870327994,
295
+ "grad_norm": 11.572857856750488,
296
+ "learning_rate": 9.122222222222223e-06,
297
+ "loss": 0.4487,
298
+ "step": 900
299
+ },
300
+ {
301
+ "epoch": 2.820747520976354,
302
+ "grad_norm": 13.59435749053955,
303
+ "learning_rate": 9.066666666666667e-06,
304
+ "loss": 0.4478,
305
+ "step": 925
306
+ },
307
+ {
308
+ "epoch": 2.8970251716247137,
309
+ "grad_norm": 11.392141342163086,
310
+ "learning_rate": 9.011111111111111e-06,
311
+ "loss": 0.352,
312
+ "step": 950
313
+ },
314
+ {
315
+ "epoch": 2.973302822273074,
316
+ "grad_norm": 8.065723419189453,
317
+ "learning_rate": 8.955555555555555e-06,
318
+ "loss": 0.3986,
319
+ "step": 975
320
+ },
321
+ {
322
+ "epoch": 3.0488176964149503,
323
+ "grad_norm": 7.743978977203369,
324
+ "learning_rate": 8.900000000000001e-06,
325
+ "loss": 0.2542,
326
+ "step": 1000
327
+ },
328
+ {
329
+ "epoch": 3.0488176964149503,
330
+ "eval_loss": 0.7139758467674255,
331
+ "eval_runtime": 280.6617,
332
+ "eval_samples_per_second": 1.625,
333
+ "eval_steps_per_second": 0.812,
334
+ "eval_wer": 0.5034287065500118,
335
+ "step": 1000
336
+ },
337
+ {
338
+ "epoch": 3.1250953470633105,
339
+ "grad_norm": 6.686219215393066,
340
+ "learning_rate": 8.844444444444445e-06,
341
+ "loss": 0.1859,
342
+ "step": 1025
343
+ },
344
+ {
345
+ "epoch": 3.2013729977116703,
346
+ "grad_norm": 8.531457901000977,
347
+ "learning_rate": 8.788888888888891e-06,
348
+ "loss": 0.2228,
349
+ "step": 1050
350
+ },
351
+ {
352
+ "epoch": 3.2776506483600305,
353
+ "grad_norm": 8.079030990600586,
354
+ "learning_rate": 8.733333333333333e-06,
355
+ "loss": 0.233,
356
+ "step": 1075
357
+ },
358
+ {
359
+ "epoch": 3.3539282990083907,
360
+ "grad_norm": 7.210234642028809,
361
+ "learning_rate": 8.677777777777779e-06,
362
+ "loss": 0.278,
363
+ "step": 1100
364
+ },
365
+ {
366
+ "epoch": 3.4302059496567505,
367
+ "grad_norm": 9.321666717529297,
368
+ "learning_rate": 8.622222222222223e-06,
369
+ "loss": 0.2022,
370
+ "step": 1125
371
+ },
372
+ {
373
+ "epoch": 3.5064836003051107,
374
+ "grad_norm": 8.422845840454102,
375
+ "learning_rate": 8.566666666666667e-06,
376
+ "loss": 0.2462,
377
+ "step": 1150
378
+ },
379
+ {
380
+ "epoch": 3.5827612509534705,
381
+ "grad_norm": 7.176828384399414,
382
+ "learning_rate": 8.511111111111113e-06,
383
+ "loss": 0.2568,
384
+ "step": 1175
385
+ },
386
+ {
387
+ "epoch": 3.6590389016018308,
388
+ "grad_norm": 7.077821731567383,
389
+ "learning_rate": 8.455555555555555e-06,
390
+ "loss": 0.252,
391
+ "step": 1200
392
+ },
393
+ {
394
+ "epoch": 3.6590389016018308,
395
+ "eval_loss": 0.7221043705940247,
396
+ "eval_runtime": 282.4738,
397
+ "eval_samples_per_second": 1.614,
398
+ "eval_steps_per_second": 0.807,
399
+ "eval_wer": 0.4833293922913218,
400
+ "step": 1200
401
+ },
402
+ {
403
+ "epoch": 3.7353165522501905,
404
+ "grad_norm": 8.898653984069824,
405
+ "learning_rate": 8.400000000000001e-06,
406
+ "loss": 0.2351,
407
+ "step": 1225
408
+ },
409
+ {
410
+ "epoch": 3.8115942028985508,
411
+ "grad_norm": 8.900397300720215,
412
+ "learning_rate": 8.344444444444445e-06,
413
+ "loss": 0.2656,
414
+ "step": 1250
415
+ },
416
+ {
417
+ "epoch": 3.887871853546911,
418
+ "grad_norm": 6.858781814575195,
419
+ "learning_rate": 8.288888888888889e-06,
420
+ "loss": 0.2253,
421
+ "step": 1275
422
+ },
423
+ {
424
+ "epoch": 3.964149504195271,
425
+ "grad_norm": 5.616495609283447,
426
+ "learning_rate": 8.233333333333335e-06,
427
+ "loss": 0.2518,
428
+ "step": 1300
429
+ },
430
+ {
431
+ "epoch": 4.0396643783371475,
432
+ "grad_norm": 2.58622407913208,
433
+ "learning_rate": 8.177777777777779e-06,
434
+ "loss": 0.1329,
435
+ "step": 1325
436
+ },
437
+ {
438
+ "epoch": 4.115942028985507,
439
+ "grad_norm": 4.7402544021606445,
440
+ "learning_rate": 8.122222222222223e-06,
441
+ "loss": 0.0939,
442
+ "step": 1350
443
+ },
444
+ {
445
+ "epoch": 4.192219679633867,
446
+ "grad_norm": 7.061638355255127,
447
+ "learning_rate": 8.066666666666667e-06,
448
+ "loss": 0.1122,
449
+ "step": 1375
450
+ },
451
+ {
452
+ "epoch": 4.268497330282227,
453
+ "grad_norm": 3.740872383117676,
454
+ "learning_rate": 8.011111111111113e-06,
455
+ "loss": 0.137,
456
+ "step": 1400
457
+ },
458
+ {
459
+ "epoch": 4.268497330282227,
460
+ "eval_loss": 0.757293701171875,
461
+ "eval_runtime": 278.9207,
462
+ "eval_samples_per_second": 1.635,
463
+ "eval_steps_per_second": 0.817,
464
+ "eval_wer": 0.4878221801844408,
465
+ "step": 1400
466
+ },
467
+ {
468
+ "epoch": 4.344774980930588,
469
+ "grad_norm": 4.499791622161865,
470
+ "learning_rate": 7.955555555555557e-06,
471
+ "loss": 0.0954,
472
+ "step": 1425
473
+ },
474
+ {
475
+ "epoch": 4.421052631578947,
476
+ "grad_norm": 4.751846790313721,
477
+ "learning_rate": 7.9e-06,
478
+ "loss": 0.1071,
479
+ "step": 1450
480
+ },
481
+ {
482
+ "epoch": 4.497330282227307,
483
+ "grad_norm": 9.211798667907715,
484
+ "learning_rate": 7.844444444444446e-06,
485
+ "loss": 0.102,
486
+ "step": 1475
487
+ },
488
+ {
489
+ "epoch": 4.573607932875667,
490
+ "grad_norm": 6.26869535446167,
491
+ "learning_rate": 7.788888888888889e-06,
492
+ "loss": 0.0968,
493
+ "step": 1500
494
+ },
495
+ {
496
+ "epoch": 4.649885583524028,
497
+ "grad_norm": 6.295158386230469,
498
+ "learning_rate": 7.733333333333334e-06,
499
+ "loss": 0.1196,
500
+ "step": 1525
501
+ },
502
+ {
503
+ "epoch": 4.726163234172388,
504
+ "grad_norm": 9.435331344604492,
505
+ "learning_rate": 7.677777777777778e-06,
506
+ "loss": 0.1279,
507
+ "step": 1550
508
+ },
509
+ {
510
+ "epoch": 4.802440884820747,
511
+ "grad_norm": 3.417738914489746,
512
+ "learning_rate": 7.622222222222223e-06,
513
+ "loss": 0.1082,
514
+ "step": 1575
515
+ },
516
+ {
517
+ "epoch": 4.878718535469107,
518
+ "grad_norm": 7.170241832733154,
519
+ "learning_rate": 7.566666666666667e-06,
520
+ "loss": 0.114,
521
+ "step": 1600
522
+ },
523
+ {
524
+ "epoch": 4.878718535469107,
525
+ "eval_loss": 0.793424129486084,
526
+ "eval_runtime": 277.9238,
527
+ "eval_samples_per_second": 1.641,
528
+ "eval_steps_per_second": 0.82,
529
+ "eval_wer": 0.4812012296051076,
530
+ "step": 1600
531
+ },
532
+ {
533
+ "epoch": 4.878718535469107,
534
+ "step": 1600,
535
+ "total_flos": 1.305149555736576e+19,
536
+ "train_loss": 0.5828571186959743,
537
+ "train_runtime": 5664.3172,
538
+ "train_samples_per_second": 7.062,
539
+ "train_steps_per_second": 0.883
540
+ }
541
+ ],
542
+ "logging_steps": 25,
543
+ "max_steps": 5000,
544
+ "num_input_tokens_seen": 0,
545
+ "num_train_epochs": 16,
546
+ "save_steps": 200,
547
+ "stateful_callbacks": {
548
+ "EarlyStoppingCallback": {
549
+ "args": {
550
+ "early_stopping_patience": 4,
551
+ "early_stopping_threshold": 0.0
552
+ },
553
+ "attributes": {
554
+ "early_stopping_patience_counter": 4
555
+ }
556
+ },
557
+ "TrainerControl": {
558
+ "args": {
559
+ "should_epoch_stop": false,
560
+ "should_evaluate": false,
561
+ "should_log": false,
562
+ "should_save": true,
563
+ "should_training_stop": true
564
+ },
565
+ "attributes": {}
566
+ }
567
+ },
568
+ "total_flos": 1.305149555736576e+19,
569
+ "train_batch_size": 2,
570
+ "trial_name": null,
571
+ "trial_params": null
572
+ }