harisali9211 commited on
Commit
08a3d02
·
verified ·
1 Parent(s): aea78be

All Dunn!!!

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +423 -45
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "total_flos": 7.193998576047882e+17,
4
- "train_loss": 1.5048968147058956,
5
- "train_runtime": 791.7321,
6
- "train_samples_per_second": 0.614,
7
- "train_steps_per_second": 0.154
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "total_flos": 1.9906356553640313e+19,
4
+ "train_loss": 0.6271170827069549,
5
+ "train_runtime": 2733.4434,
6
+ "train_samples_per_second": 4.92,
7
+ "train_steps_per_second": 0.615
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "total_flos": 7.193998576047882e+17,
4
- "train_loss": 1.5048968147058956,
5
- "train_runtime": 791.7321,
6
- "train_samples_per_second": 0.614,
7
- "train_steps_per_second": 0.154
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "total_flos": 1.9906356553640313e+19,
4
+ "train_loss": 0.6271170827069549,
5
+ "train_runtime": 2733.4434,
6
+ "train_samples_per_second": 4.92,
7
+ "train_steps_per_second": 0.615
8
  }
trainer_state.json CHANGED
@@ -3,76 +3,454 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 122,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01639344262295082,
13
- "grad_norm": 178.06546020507812,
14
- "learning_rate": 4.959016393442623e-05,
15
- "loss": 8.943,
16
  "step": 1
17
  },
18
  {
19
- "epoch": 0.47540983606557374,
20
- "grad_norm": 54.69188690185547,
21
- "learning_rate": 3.8114754098360655e-05,
22
- "loss": 2.2897,
23
  "step": 29
24
  },
25
  {
26
- "epoch": 0.9508196721311475,
27
- "grad_norm": 16.769203186035156,
28
- "learning_rate": 2.6229508196721314e-05,
29
- "loss": 2.0108,
30
  "step": 58
31
  },
32
  {
33
- "epoch": 1.0,
34
- "eval_cer": 0.1530448717948718,
35
- "eval_loss": 1.128391981124878,
36
- "eval_runtime": 125.6559,
37
- "eval_samples_per_second": 0.485,
38
- "eval_steps_per_second": 0.064,
39
- "step": 61
40
- },
41
- {
42
- "epoch": 1.4262295081967213,
43
- "grad_norm": 30.61115837097168,
44
- "learning_rate": 1.4344262295081968e-05,
45
- "loss": 1.1471,
46
  "step": 87
47
  },
48
  {
49
- "epoch": 1.901639344262295,
50
- "grad_norm": 10.20889663696289,
51
- "learning_rate": 2.459016393442623e-06,
52
- "loss": 0.5665,
53
  "step": 116
54
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  {
56
  "epoch": 2.0,
57
- "eval_cer": 0.03125,
58
- "eval_loss": 0.5153284072875977,
59
- "eval_runtime": 107.7401,
60
- "eval_samples_per_second": 0.566,
61
- "eval_steps_per_second": 0.074,
62
- "step": 122
63
  },
64
  {
65
  "epoch": 2.0,
66
- "step": 122,
67
- "total_flos": 7.193998576047882e+17,
68
- "train_loss": 1.5048968147058956,
69
- "train_runtime": 791.7321,
70
- "train_samples_per_second": 0.614,
71
- "train_steps_per_second": 0.154
72
  }
73
  ],
74
  "logging_steps": 29,
75
- "max_steps": 122,
76
  "num_input_tokens_seen": 0,
77
  "num_train_epochs": 2,
78
  "save_steps": 500,
@@ -88,8 +466,8 @@
88
  "attributes": {}
89
  }
90
  },
91
- "total_flos": 7.193998576047882e+17,
92
- "train_batch_size": 4,
93
  "trial_name": null,
94
  "trial_params": null
95
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 1682,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0011890606420927466,
13
+ "grad_norm": 128.86566162109375,
14
+ "learning_rate": 4.9970273483947685e-05,
15
+ "loss": 9.559,
16
  "step": 1
17
  },
18
  {
19
+ "epoch": 0.034482758620689655,
20
+ "grad_norm": 23.844635009765625,
21
+ "learning_rate": 4.913793103448276e-05,
22
+ "loss": 2.0043,
23
  "step": 29
24
  },
25
  {
26
+ "epoch": 0.06896551724137931,
27
+ "grad_norm": 43.76908493041992,
28
+ "learning_rate": 4.827586206896552e-05,
29
+ "loss": 1.6394,
30
  "step": 58
31
  },
32
  {
33
+ "epoch": 0.10344827586206896,
34
+ "grad_norm": 112.72496032714844,
35
+ "learning_rate": 4.741379310344828e-05,
36
+ "loss": 1.1869,
 
 
 
 
 
 
 
 
 
37
  "step": 87
38
  },
39
  {
40
+ "epoch": 0.13793103448275862,
41
+ "grad_norm": 57.330135345458984,
42
+ "learning_rate": 4.655172413793104e-05,
43
+ "loss": 1.4015,
44
  "step": 116
45
  },
46
+ {
47
+ "epoch": 0.1724137931034483,
48
+ "grad_norm": 12.192914962768555,
49
+ "learning_rate": 4.5689655172413794e-05,
50
+ "loss": 1.1619,
51
+ "step": 145
52
+ },
53
+ {
54
+ "epoch": 0.20689655172413793,
55
+ "grad_norm": 49.972900390625,
56
+ "learning_rate": 4.482758620689655e-05,
57
+ "loss": 1.4928,
58
+ "step": 174
59
+ },
60
+ {
61
+ "epoch": 0.2413793103448276,
62
+ "grad_norm": 28.061901092529297,
63
+ "learning_rate": 4.396551724137931e-05,
64
+ "loss": 1.1286,
65
+ "step": 203
66
+ },
67
+ {
68
+ "epoch": 0.27586206896551724,
69
+ "grad_norm": 13.678406715393066,
70
+ "learning_rate": 4.3103448275862066e-05,
71
+ "loss": 0.9936,
72
+ "step": 232
73
+ },
74
+ {
75
+ "epoch": 0.3103448275862069,
76
+ "grad_norm": 56.66800308227539,
77
+ "learning_rate": 4.224137931034483e-05,
78
+ "loss": 1.1352,
79
+ "step": 261
80
+ },
81
+ {
82
+ "epoch": 0.3448275862068966,
83
+ "grad_norm": 18.401317596435547,
84
+ "learning_rate": 4.1379310344827587e-05,
85
+ "loss": 1.0754,
86
+ "step": 290
87
+ },
88
+ {
89
+ "epoch": 0.3793103448275862,
90
+ "grad_norm": 28.412200927734375,
91
+ "learning_rate": 4.0517241379310344e-05,
92
+ "loss": 1.0104,
93
+ "step": 319
94
+ },
95
+ {
96
+ "epoch": 0.41379310344827586,
97
+ "grad_norm": 62.137596130371094,
98
+ "learning_rate": 3.965517241379311e-05,
99
+ "loss": 0.9393,
100
+ "step": 348
101
+ },
102
+ {
103
+ "epoch": 0.4482758620689655,
104
+ "grad_norm": 44.91804504394531,
105
+ "learning_rate": 3.8793103448275865e-05,
106
+ "loss": 0.727,
107
+ "step": 377
108
+ },
109
+ {
110
+ "epoch": 0.4827586206896552,
111
+ "grad_norm": 15.308109283447266,
112
+ "learning_rate": 3.793103448275862e-05,
113
+ "loss": 0.8675,
114
+ "step": 406
115
+ },
116
+ {
117
+ "epoch": 0.5172413793103449,
118
+ "grad_norm": 11.947402000427246,
119
+ "learning_rate": 3.7068965517241385e-05,
120
+ "loss": 0.7525,
121
+ "step": 435
122
+ },
123
+ {
124
+ "epoch": 0.5517241379310345,
125
+ "grad_norm": 22.51788902282715,
126
+ "learning_rate": 3.620689655172414e-05,
127
+ "loss": 0.7872,
128
+ "step": 464
129
+ },
130
+ {
131
+ "epoch": 0.5862068965517241,
132
+ "grad_norm": 39.137386322021484,
133
+ "learning_rate": 3.53448275862069e-05,
134
+ "loss": 0.7889,
135
+ "step": 493
136
+ },
137
+ {
138
+ "epoch": 0.6206896551724138,
139
+ "grad_norm": 38.08049774169922,
140
+ "learning_rate": 3.4482758620689657e-05,
141
+ "loss": 0.7347,
142
+ "step": 522
143
+ },
144
+ {
145
+ "epoch": 0.6551724137931034,
146
+ "grad_norm": 10.072871208190918,
147
+ "learning_rate": 3.3620689655172414e-05,
148
+ "loss": 0.7422,
149
+ "step": 551
150
+ },
151
+ {
152
+ "epoch": 0.6896551724137931,
153
+ "grad_norm": 24.6478328704834,
154
+ "learning_rate": 3.275862068965517e-05,
155
+ "loss": 0.7217,
156
+ "step": 580
157
+ },
158
+ {
159
+ "epoch": 0.7241379310344828,
160
+ "grad_norm": 8.815550804138184,
161
+ "learning_rate": 3.1896551724137935e-05,
162
+ "loss": 0.767,
163
+ "step": 609
164
+ },
165
+ {
166
+ "epoch": 0.7586206896551724,
167
+ "grad_norm": 7.418780326843262,
168
+ "learning_rate": 3.103448275862069e-05,
169
+ "loss": 0.7365,
170
+ "step": 638
171
+ },
172
+ {
173
+ "epoch": 0.7931034482758621,
174
+ "grad_norm": 16.163270950317383,
175
+ "learning_rate": 3.017241379310345e-05,
176
+ "loss": 0.6203,
177
+ "step": 667
178
+ },
179
+ {
180
+ "epoch": 0.8275862068965517,
181
+ "grad_norm": 47.155818939208984,
182
+ "learning_rate": 2.9310344827586206e-05,
183
+ "loss": 0.7505,
184
+ "step": 696
185
+ },
186
+ {
187
+ "epoch": 0.8620689655172413,
188
+ "grad_norm": 17.693836212158203,
189
+ "learning_rate": 2.844827586206897e-05,
190
+ "loss": 0.6014,
191
+ "step": 725
192
+ },
193
+ {
194
+ "epoch": 0.896551724137931,
195
+ "grad_norm": 15.081289291381836,
196
+ "learning_rate": 2.7586206896551727e-05,
197
+ "loss": 0.5907,
198
+ "step": 754
199
+ },
200
+ {
201
+ "epoch": 0.9310344827586207,
202
+ "grad_norm": 235.15663146972656,
203
+ "learning_rate": 2.672413793103448e-05,
204
+ "loss": 0.5196,
205
+ "step": 783
206
+ },
207
+ {
208
+ "epoch": 0.9655172413793104,
209
+ "grad_norm": 13.673110961914062,
210
+ "learning_rate": 2.5862068965517244e-05,
211
+ "loss": 0.5441,
212
+ "step": 812
213
+ },
214
+ {
215
+ "epoch": 1.0,
216
+ "grad_norm": 22.076805114746094,
217
+ "learning_rate": 2.5e-05,
218
+ "loss": 0.5455,
219
+ "step": 841
220
+ },
221
+ {
222
+ "epoch": 1.0,
223
+ "eval_cer": 0.020121099208197483,
224
+ "eval_loss": 0.46177592873573303,
225
+ "eval_runtime": 644.2587,
226
+ "eval_samples_per_second": 2.611,
227
+ "eval_steps_per_second": 0.328,
228
+ "step": 841
229
+ },
230
+ {
231
+ "epoch": 1.0344827586206897,
232
+ "grad_norm": 6.016767501831055,
233
+ "learning_rate": 2.413793103448276e-05,
234
+ "loss": 0.416,
235
+ "step": 870
236
+ },
237
+ {
238
+ "epoch": 1.0689655172413792,
239
+ "grad_norm": 5.592987060546875,
240
+ "learning_rate": 2.327586206896552e-05,
241
+ "loss": 0.4161,
242
+ "step": 899
243
+ },
244
+ {
245
+ "epoch": 1.103448275862069,
246
+ "grad_norm": 4.175529479980469,
247
+ "learning_rate": 2.2413793103448276e-05,
248
+ "loss": 0.4516,
249
+ "step": 928
250
+ },
251
+ {
252
+ "epoch": 1.1379310344827587,
253
+ "grad_norm": 7.126400470733643,
254
+ "learning_rate": 2.1551724137931033e-05,
255
+ "loss": 0.4583,
256
+ "step": 957
257
+ },
258
+ {
259
+ "epoch": 1.1724137931034484,
260
+ "grad_norm": 5.696765899658203,
261
+ "learning_rate": 2.0689655172413793e-05,
262
+ "loss": 0.3918,
263
+ "step": 986
264
+ },
265
+ {
266
+ "epoch": 1.206896551724138,
267
+ "grad_norm": 11.261072158813477,
268
+ "learning_rate": 1.9827586206896554e-05,
269
+ "loss": 0.4423,
270
+ "step": 1015
271
+ },
272
+ {
273
+ "epoch": 1.2413793103448276,
274
+ "grad_norm": 3.23542857170105,
275
+ "learning_rate": 1.896551724137931e-05,
276
+ "loss": 0.3769,
277
+ "step": 1044
278
+ },
279
+ {
280
+ "epoch": 1.2758620689655173,
281
+ "grad_norm": 4.922264099121094,
282
+ "learning_rate": 1.810344827586207e-05,
283
+ "loss": 0.4311,
284
+ "step": 1073
285
+ },
286
+ {
287
+ "epoch": 1.3103448275862069,
288
+ "grad_norm": 3.692586898803711,
289
+ "learning_rate": 1.7241379310344828e-05,
290
+ "loss": 0.3667,
291
+ "step": 1102
292
+ },
293
+ {
294
+ "epoch": 1.3448275862068966,
295
+ "grad_norm": 2.88181471824646,
296
+ "learning_rate": 1.6379310344827585e-05,
297
+ "loss": 0.3167,
298
+ "step": 1131
299
+ },
300
+ {
301
+ "epoch": 1.3793103448275863,
302
+ "grad_norm": 3.277984142303467,
303
+ "learning_rate": 1.5517241379310346e-05,
304
+ "loss": 0.3331,
305
+ "step": 1160
306
+ },
307
+ {
308
+ "epoch": 1.4137931034482758,
309
+ "grad_norm": 7.566446304321289,
310
+ "learning_rate": 1.4655172413793103e-05,
311
+ "loss": 0.3046,
312
+ "step": 1189
313
+ },
314
+ {
315
+ "epoch": 1.4482758620689655,
316
+ "grad_norm": 17.953258514404297,
317
+ "learning_rate": 1.3793103448275863e-05,
318
+ "loss": 0.3332,
319
+ "step": 1218
320
+ },
321
+ {
322
+ "epoch": 1.4827586206896552,
323
+ "grad_norm": 11.560026168823242,
324
+ "learning_rate": 1.2931034482758622e-05,
325
+ "loss": 0.3299,
326
+ "step": 1247
327
+ },
328
+ {
329
+ "epoch": 1.5172413793103448,
330
+ "grad_norm": 5.917276859283447,
331
+ "learning_rate": 1.206896551724138e-05,
332
+ "loss": 0.2961,
333
+ "step": 1276
334
+ },
335
+ {
336
+ "epoch": 1.5517241379310345,
337
+ "grad_norm": 3.665133476257324,
338
+ "learning_rate": 1.1206896551724138e-05,
339
+ "loss": 0.3142,
340
+ "step": 1305
341
+ },
342
+ {
343
+ "epoch": 1.5862068965517242,
344
+ "grad_norm": 2.3258779048919678,
345
+ "learning_rate": 1.0344827586206897e-05,
346
+ "loss": 0.3005,
347
+ "step": 1334
348
+ },
349
+ {
350
+ "epoch": 1.6206896551724137,
351
+ "grad_norm": 2.856088638305664,
352
+ "learning_rate": 9.482758620689655e-06,
353
+ "loss": 0.2652,
354
+ "step": 1363
355
+ },
356
+ {
357
+ "epoch": 1.6551724137931034,
358
+ "grad_norm": 8.568778991699219,
359
+ "learning_rate": 8.620689655172414e-06,
360
+ "loss": 0.2652,
361
+ "step": 1392
362
+ },
363
+ {
364
+ "epoch": 1.6896551724137931,
365
+ "grad_norm": 4.4803667068481445,
366
+ "learning_rate": 7.758620689655173e-06,
367
+ "loss": 0.2541,
368
+ "step": 1421
369
+ },
370
+ {
371
+ "epoch": 1.7241379310344827,
372
+ "grad_norm": 13.121492385864258,
373
+ "learning_rate": 6.896551724137932e-06,
374
+ "loss": 0.2754,
375
+ "step": 1450
376
+ },
377
+ {
378
+ "epoch": 1.7586206896551724,
379
+ "grad_norm": 2.48468279838562,
380
+ "learning_rate": 6.03448275862069e-06,
381
+ "loss": 0.2379,
382
+ "step": 1479
383
+ },
384
+ {
385
+ "epoch": 1.793103448275862,
386
+ "grad_norm": 1.497287631034851,
387
+ "learning_rate": 5.172413793103448e-06,
388
+ "loss": 0.2273,
389
+ "step": 1508
390
+ },
391
+ {
392
+ "epoch": 1.8275862068965516,
393
+ "grad_norm": 2.972078800201416,
394
+ "learning_rate": 4.310344827586207e-06,
395
+ "loss": 0.2254,
396
+ "step": 1537
397
+ },
398
+ {
399
+ "epoch": 1.8620689655172413,
400
+ "grad_norm": 12.911340713500977,
401
+ "learning_rate": 3.448275862068966e-06,
402
+ "loss": 0.2448,
403
+ "step": 1566
404
+ },
405
+ {
406
+ "epoch": 1.896551724137931,
407
+ "grad_norm": 1.3689017295837402,
408
+ "learning_rate": 2.586206896551724e-06,
409
+ "loss": 0.2089,
410
+ "step": 1595
411
+ },
412
+ {
413
+ "epoch": 1.9310344827586206,
414
+ "grad_norm": 4.04969596862793,
415
+ "learning_rate": 1.724137931034483e-06,
416
+ "loss": 0.2174,
417
+ "step": 1624
418
+ },
419
+ {
420
+ "epoch": 1.9655172413793105,
421
+ "grad_norm": 2.9180474281311035,
422
+ "learning_rate": 8.620689655172415e-07,
423
+ "loss": 0.2381,
424
+ "step": 1653
425
+ },
426
+ {
427
+ "epoch": 2.0,
428
+ "grad_norm": 4.1190409660339355,
429
+ "learning_rate": 0.0,
430
+ "loss": 0.2068,
431
+ "step": 1682
432
+ },
433
  {
434
  "epoch": 2.0,
435
+ "eval_cer": 0.0032914143766495886,
436
+ "eval_loss": 0.23910197615623474,
437
+ "eval_runtime": 636.093,
438
+ "eval_samples_per_second": 2.644,
439
+ "eval_steps_per_second": 0.332,
440
+ "step": 1682
441
  },
442
  {
443
  "epoch": 2.0,
444
+ "step": 1682,
445
+ "total_flos": 1.9906356553640313e+19,
446
+ "train_loss": 0.6271170827069549,
447
+ "train_runtime": 2733.4434,
448
+ "train_samples_per_second": 4.92,
449
+ "train_steps_per_second": 0.615
450
  }
451
  ],
452
  "logging_steps": 29,
453
+ "max_steps": 1682,
454
  "num_input_tokens_seen": 0,
455
  "num_train_epochs": 2,
456
  "save_steps": 500,
 
466
  "attributes": {}
467
  }
468
  },
469
+ "total_flos": 1.9906356553640313e+19,
470
+ "train_batch_size": 8,
471
  "trial_name": null,
472
  "trial_params": null
473
  }