harisali9211 commited on
Commit
904d7d5
·
verified ·
1 Parent(s): 33a17e3

All Dunn!!!

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +473 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 1.9906356553640313e+19,
4
+ "train_loss": 0.47363961749354666,
5
+ "train_runtime": 3260.3696,
6
+ "train_samples_per_second": 4.125,
7
+ "train_steps_per_second": 0.516
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 1.9906356553640313e+19,
4
+ "train_loss": 0.47363961749354666,
5
+ "train_runtime": 3260.3696,
6
+ "train_samples_per_second": 4.125,
7
+ "train_steps_per_second": 0.516
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,473 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1682,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0011890606420927466,
13
+ "grad_norm": 115.31934356689453,
14
+ "learning_rate": 4.9970273483947685e-05,
15
+ "loss": 9.3546,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.034482758620689655,
20
+ "grad_norm": 18.599306106567383,
21
+ "learning_rate": 4.913793103448276e-05,
22
+ "loss": 2.4091,
23
+ "step": 29
24
+ },
25
+ {
26
+ "epoch": 0.06896551724137931,
27
+ "grad_norm": 12.393567085266113,
28
+ "learning_rate": 4.827586206896552e-05,
29
+ "loss": 1.1241,
30
+ "step": 58
31
+ },
32
+ {
33
+ "epoch": 0.10344827586206896,
34
+ "grad_norm": 50.094051361083984,
35
+ "learning_rate": 4.741379310344828e-05,
36
+ "loss": 1.0125,
37
+ "step": 87
38
+ },
39
+ {
40
+ "epoch": 0.13793103448275862,
41
+ "grad_norm": 39.95038986206055,
42
+ "learning_rate": 4.655172413793104e-05,
43
+ "loss": 0.9273,
44
+ "step": 116
45
+ },
46
+ {
47
+ "epoch": 0.1724137931034483,
48
+ "grad_norm": 13.281441688537598,
49
+ "learning_rate": 4.5689655172413794e-05,
50
+ "loss": 0.8734,
51
+ "step": 145
52
+ },
53
+ {
54
+ "epoch": 0.20689655172413793,
55
+ "grad_norm": 29.820255279541016,
56
+ "learning_rate": 4.482758620689655e-05,
57
+ "loss": 0.9182,
58
+ "step": 174
59
+ },
60
+ {
61
+ "epoch": 0.2413793103448276,
62
+ "grad_norm": 14.038665771484375,
63
+ "learning_rate": 4.396551724137931e-05,
64
+ "loss": 0.9129,
65
+ "step": 203
66
+ },
67
+ {
68
+ "epoch": 0.27586206896551724,
69
+ "grad_norm": 13.406801223754883,
70
+ "learning_rate": 4.3103448275862066e-05,
71
+ "loss": 0.7973,
72
+ "step": 232
73
+ },
74
+ {
75
+ "epoch": 0.3103448275862069,
76
+ "grad_norm": 15.321793556213379,
77
+ "learning_rate": 4.224137931034483e-05,
78
+ "loss": 0.8415,
79
+ "step": 261
80
+ },
81
+ {
82
+ "epoch": 0.3448275862068966,
83
+ "grad_norm": 27.121288299560547,
84
+ "learning_rate": 4.1379310344827587e-05,
85
+ "loss": 0.7995,
86
+ "step": 290
87
+ },
88
+ {
89
+ "epoch": 0.3793103448275862,
90
+ "grad_norm": 36.715858459472656,
91
+ "learning_rate": 4.0517241379310344e-05,
92
+ "loss": 0.6301,
93
+ "step": 319
94
+ },
95
+ {
96
+ "epoch": 0.41379310344827586,
97
+ "grad_norm": 16.650150299072266,
98
+ "learning_rate": 3.965517241379311e-05,
99
+ "loss": 0.5675,
100
+ "step": 348
101
+ },
102
+ {
103
+ "epoch": 0.4482758620689655,
104
+ "grad_norm": 230.8839569091797,
105
+ "learning_rate": 3.8793103448275865e-05,
106
+ "loss": 0.6154,
107
+ "step": 377
108
+ },
109
+ {
110
+ "epoch": 0.4827586206896552,
111
+ "grad_norm": 6.414034366607666,
112
+ "learning_rate": 3.793103448275862e-05,
113
+ "loss": 0.6439,
114
+ "step": 406
115
+ },
116
+ {
117
+ "epoch": 0.5172413793103449,
118
+ "grad_norm": 9.441433906555176,
119
+ "learning_rate": 3.7068965517241385e-05,
120
+ "loss": 0.5776,
121
+ "step": 435
122
+ },
123
+ {
124
+ "epoch": 0.5517241379310345,
125
+ "grad_norm": 23.88970947265625,
126
+ "learning_rate": 3.620689655172414e-05,
127
+ "loss": 0.6126,
128
+ "step": 464
129
+ },
130
+ {
131
+ "epoch": 0.5862068965517241,
132
+ "grad_norm": 10.72873592376709,
133
+ "learning_rate": 3.53448275862069e-05,
134
+ "loss": 0.529,
135
+ "step": 493
136
+ },
137
+ {
138
+ "epoch": 0.6206896551724138,
139
+ "grad_norm": 22.285120010375977,
140
+ "learning_rate": 3.4482758620689657e-05,
141
+ "loss": 0.5418,
142
+ "step": 522
143
+ },
144
+ {
145
+ "epoch": 0.6551724137931034,
146
+ "grad_norm": 28.53759002685547,
147
+ "learning_rate": 3.3620689655172414e-05,
148
+ "loss": 0.4864,
149
+ "step": 551
150
+ },
151
+ {
152
+ "epoch": 0.6896551724137931,
153
+ "grad_norm": 15.215431213378906,
154
+ "learning_rate": 3.275862068965517e-05,
155
+ "loss": 0.5474,
156
+ "step": 580
157
+ },
158
+ {
159
+ "epoch": 0.7241379310344828,
160
+ "grad_norm": 5.726695537567139,
161
+ "learning_rate": 3.1896551724137935e-05,
162
+ "loss": 0.4492,
163
+ "step": 609
164
+ },
165
+ {
166
+ "epoch": 0.7586206896551724,
167
+ "grad_norm": 20.181598663330078,
168
+ "learning_rate": 3.103448275862069e-05,
169
+ "loss": 0.4925,
170
+ "step": 638
171
+ },
172
+ {
173
+ "epoch": 0.7931034482758621,
174
+ "grad_norm": 16.79625701904297,
175
+ "learning_rate": 3.017241379310345e-05,
176
+ "loss": 0.4405,
177
+ "step": 667
178
+ },
179
+ {
180
+ "epoch": 0.8275862068965517,
181
+ "grad_norm": 5.420645236968994,
182
+ "learning_rate": 2.9310344827586206e-05,
183
+ "loss": 0.5411,
184
+ "step": 696
185
+ },
186
+ {
187
+ "epoch": 0.8620689655172413,
188
+ "grad_norm": 6.218174457550049,
189
+ "learning_rate": 2.844827586206897e-05,
190
+ "loss": 0.5516,
191
+ "step": 725
192
+ },
193
+ {
194
+ "epoch": 0.896551724137931,
195
+ "grad_norm": 27.71928596496582,
196
+ "learning_rate": 2.7586206896551727e-05,
197
+ "loss": 0.4341,
198
+ "step": 754
199
+ },
200
+ {
201
+ "epoch": 0.9310344827586207,
202
+ "grad_norm": 31.43971824645996,
203
+ "learning_rate": 2.672413793103448e-05,
204
+ "loss": 0.3883,
205
+ "step": 783
206
+ },
207
+ {
208
+ "epoch": 0.9655172413793104,
209
+ "grad_norm": 7.160225868225098,
210
+ "learning_rate": 2.5862068965517244e-05,
211
+ "loss": 0.4214,
212
+ "step": 812
213
+ },
214
+ {
215
+ "epoch": 1.0,
216
+ "grad_norm": 7.139071464538574,
217
+ "learning_rate": 2.5e-05,
218
+ "loss": 0.3728,
219
+ "step": 841
220
+ },
221
+ {
222
+ "epoch": 1.0,
223
+ "eval_cer": 0.019845431962462048,
224
+ "eval_loss": 0.2980102300643921,
225
+ "eval_runtime": 887.0299,
226
+ "eval_samples_per_second": 1.896,
227
+ "eval_steps_per_second": 0.238,
228
+ "step": 841
229
+ },
230
+ {
231
+ "epoch": 1.0344827586206897,
232
+ "grad_norm": 14.740863800048828,
233
+ "learning_rate": 2.413793103448276e-05,
234
+ "loss": 0.3343,
235
+ "step": 870
236
+ },
237
+ {
238
+ "epoch": 1.0689655172413792,
239
+ "grad_norm": 22.295618057250977,
240
+ "learning_rate": 2.327586206896552e-05,
241
+ "loss": 0.3419,
242
+ "step": 899
243
+ },
244
+ {
245
+ "epoch": 1.103448275862069,
246
+ "grad_norm": 2.4551053047180176,
247
+ "learning_rate": 2.2413793103448276e-05,
248
+ "loss": 0.3123,
249
+ "step": 928
250
+ },
251
+ {
252
+ "epoch": 1.1379310344827587,
253
+ "grad_norm": 7.861540794372559,
254
+ "learning_rate": 2.1551724137931033e-05,
255
+ "loss": 0.349,
256
+ "step": 957
257
+ },
258
+ {
259
+ "epoch": 1.1724137931034484,
260
+ "grad_norm": 7.037637233734131,
261
+ "learning_rate": 2.0689655172413793e-05,
262
+ "loss": 0.2869,
263
+ "step": 986
264
+ },
265
+ {
266
+ "epoch": 1.206896551724138,
267
+ "grad_norm": 11.390913009643555,
268
+ "learning_rate": 1.9827586206896554e-05,
269
+ "loss": 0.2826,
270
+ "step": 1015
271
+ },
272
+ {
273
+ "epoch": 1.2413793103448276,
274
+ "grad_norm": 2.8627371788024902,
275
+ "learning_rate": 1.896551724137931e-05,
276
+ "loss": 0.2573,
277
+ "step": 1044
278
+ },
279
+ {
280
+ "epoch": 1.2758620689655173,
281
+ "grad_norm": 16.846853256225586,
282
+ "learning_rate": 1.810344827586207e-05,
283
+ "loss": 0.2752,
284
+ "step": 1073
285
+ },
286
+ {
287
+ "epoch": 1.3103448275862069,
288
+ "grad_norm": 3.9017491340637207,
289
+ "learning_rate": 1.7241379310344828e-05,
290
+ "loss": 0.2555,
291
+ "step": 1102
292
+ },
293
+ {
294
+ "epoch": 1.3448275862068966,
295
+ "grad_norm": 5.074669361114502,
296
+ "learning_rate": 1.6379310344827585e-05,
297
+ "loss": 0.2452,
298
+ "step": 1131
299
+ },
300
+ {
301
+ "epoch": 1.3793103448275863,
302
+ "grad_norm": 18.69165802001953,
303
+ "learning_rate": 1.5517241379310346e-05,
304
+ "loss": 0.2666,
305
+ "step": 1160
306
+ },
307
+ {
308
+ "epoch": 1.4137931034482758,
309
+ "grad_norm": 5.539306640625,
310
+ "learning_rate": 1.4655172413793103e-05,
311
+ "loss": 0.2741,
312
+ "step": 1189
313
+ },
314
+ {
315
+ "epoch": 1.4482758620689655,
316
+ "grad_norm": 5.758066654205322,
317
+ "learning_rate": 1.3793103448275863e-05,
318
+ "loss": 0.2365,
319
+ "step": 1218
320
+ },
321
+ {
322
+ "epoch": 1.4827586206896552,
323
+ "grad_norm": 4.248748779296875,
324
+ "learning_rate": 1.2931034482758622e-05,
325
+ "loss": 0.2521,
326
+ "step": 1247
327
+ },
328
+ {
329
+ "epoch": 1.5172413793103448,
330
+ "grad_norm": 11.129036903381348,
331
+ "learning_rate": 1.206896551724138e-05,
332
+ "loss": 0.2257,
333
+ "step": 1276
334
+ },
335
+ {
336
+ "epoch": 1.5517241379310345,
337
+ "grad_norm": 2.8656809329986572,
338
+ "learning_rate": 1.1206896551724138e-05,
339
+ "loss": 0.2106,
340
+ "step": 1305
341
+ },
342
+ {
343
+ "epoch": 1.5862068965517242,
344
+ "grad_norm": 2.041245460510254,
345
+ "learning_rate": 1.0344827586206897e-05,
346
+ "loss": 0.227,
347
+ "step": 1334
348
+ },
349
+ {
350
+ "epoch": 1.6206896551724137,
351
+ "grad_norm": 1.451377034187317,
352
+ "learning_rate": 9.482758620689655e-06,
353
+ "loss": 0.1961,
354
+ "step": 1363
355
+ },
356
+ {
357
+ "epoch": 1.6551724137931034,
358
+ "grad_norm": 4.332240104675293,
359
+ "learning_rate": 8.620689655172414e-06,
360
+ "loss": 0.195,
361
+ "step": 1392
362
+ },
363
+ {
364
+ "epoch": 1.6896551724137931,
365
+ "grad_norm": 5.428757667541504,
366
+ "learning_rate": 7.758620689655173e-06,
367
+ "loss": 0.1929,
368
+ "step": 1421
369
+ },
370
+ {
371
+ "epoch": 1.7241379310344827,
372
+ "grad_norm": 2.5995798110961914,
373
+ "learning_rate": 6.896551724137932e-06,
374
+ "loss": 0.1991,
375
+ "step": 1450
376
+ },
377
+ {
378
+ "epoch": 1.7586206896551724,
379
+ "grad_norm": 2.2909860610961914,
380
+ "learning_rate": 6.03448275862069e-06,
381
+ "loss": 0.168,
382
+ "step": 1479
383
+ },
384
+ {
385
+ "epoch": 1.793103448275862,
386
+ "grad_norm": 2.049956798553467,
387
+ "learning_rate": 5.172413793103448e-06,
388
+ "loss": 0.18,
389
+ "step": 1508
390
+ },
391
+ {
392
+ "epoch": 1.8275862068965516,
393
+ "grad_norm": 2.291414737701416,
394
+ "learning_rate": 4.310344827586207e-06,
395
+ "loss": 0.1642,
396
+ "step": 1537
397
+ },
398
+ {
399
+ "epoch": 1.8620689655172413,
400
+ "grad_norm": 6.125259876251221,
401
+ "learning_rate": 3.448275862068966e-06,
402
+ "loss": 0.197,
403
+ "step": 1566
404
+ },
405
+ {
406
+ "epoch": 1.896551724137931,
407
+ "grad_norm": 2.359379291534424,
408
+ "learning_rate": 2.586206896551724e-06,
409
+ "loss": 0.1615,
410
+ "step": 1595
411
+ },
412
+ {
413
+ "epoch": 1.9310344827586206,
414
+ "grad_norm": 2.1470141410827637,
415
+ "learning_rate": 1.724137931034483e-06,
416
+ "loss": 0.1669,
417
+ "step": 1624
418
+ },
419
+ {
420
+ "epoch": 1.9655172413793105,
421
+ "grad_norm": 13.040664672851562,
422
+ "learning_rate": 8.620689655172415e-07,
423
+ "loss": 0.1641,
424
+ "step": 1653
425
+ },
426
+ {
427
+ "epoch": 2.0,
428
+ "grad_norm": 1.5605217218399048,
429
+ "learning_rate": 0.0,
430
+ "loss": 0.1551,
431
+ "step": 1682
432
+ },
433
+ {
434
+ "epoch": 2.0,
435
+ "eval_cer": 0.003201766491857577,
436
+ "eval_loss": 0.17416498064994812,
437
+ "eval_runtime": 882.6649,
438
+ "eval_samples_per_second": 1.906,
439
+ "eval_steps_per_second": 0.239,
440
+ "step": 1682
441
+ },
442
+ {
443
+ "epoch": 2.0,
444
+ "step": 1682,
445
+ "total_flos": 1.9906356553640313e+19,
446
+ "train_loss": 0.47363961749354666,
447
+ "train_runtime": 3260.3696,
448
+ "train_samples_per_second": 4.125,
449
+ "train_steps_per_second": 0.516
450
+ }
451
+ ],
452
+ "logging_steps": 29,
453
+ "max_steps": 1682,
454
+ "num_input_tokens_seen": 0,
455
+ "num_train_epochs": 2,
456
+ "save_steps": 500,
457
+ "stateful_callbacks": {
458
+ "TrainerControl": {
459
+ "args": {
460
+ "should_epoch_stop": false,
461
+ "should_evaluate": false,
462
+ "should_log": false,
463
+ "should_save": true,
464
+ "should_training_stop": true
465
+ },
466
+ "attributes": {}
467
+ }
468
+ },
469
+ "total_flos": 1.9906356553640313e+19,
470
+ "train_batch_size": 8,
471
+ "trial_name": null,
472
+ "trial_params": null
473
+ }