ggkk2012 commited on
Commit
ebb9229
·
verified ·
1 Parent(s): 8003c4b

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -695
trainer_state.json DELETED
@@ -1,695 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
- "eval_steps": 500,
6
- "global_step": 9580,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.05,
13
- "grad_norm": 1.0843530893325806,
14
- "learning_rate": 0.00019994623498004714,
15
- "loss": 1.8222,
16
- "step": 100
17
- },
18
- {
19
- "epoch": 0.1,
20
- "grad_norm": 0.9015785455703735,
21
- "learning_rate": 0.00019978499773373596,
22
- "loss": 1.8565,
23
- "step": 200
24
- },
25
- {
26
- "epoch": 0.16,
27
- "grad_norm": 1.2422339916229248,
28
- "learning_rate": 0.00019951646163954176,
29
- "loss": 1.7994,
30
- "step": 300
31
- },
32
- {
33
- "epoch": 0.21,
34
- "grad_norm": 1.0499353408813477,
35
- "learning_rate": 0.0001991409154544338,
36
- "loss": 1.6984,
37
- "step": 400
38
- },
39
- {
40
- "epoch": 0.26,
41
- "grad_norm": 1.122926950454712,
42
- "learning_rate": 0.00019865876300337478,
43
- "loss": 1.6973,
44
- "step": 500
45
- },
46
- {
47
- "epoch": 0.31,
48
- "grad_norm": 1.3406814336776733,
49
- "learning_rate": 0.00019807052274508773,
50
- "loss": 1.7661,
51
- "step": 600
52
- },
53
- {
54
- "epoch": 0.37,
55
- "grad_norm": 0.7354043126106262,
56
- "learning_rate": 0.00019737682721455714,
57
- "loss": 1.6948,
58
- "step": 700
59
- },
60
- {
61
- "epoch": 0.42,
62
- "grad_norm": 2.118002414703369,
63
- "learning_rate": 0.0001965784223428638,
64
- "loss": 1.8664,
65
- "step": 800
66
- },
67
- {
68
- "epoch": 0.47,
69
- "grad_norm": 1.6859912872314453,
70
- "learning_rate": 0.00019567616665508485,
71
- "loss": 1.7087,
72
- "step": 900
73
- },
74
- {
75
- "epoch": 0.52,
76
- "grad_norm": 1.2144606113433838,
77
- "learning_rate": 0.0001946710303471214,
78
- "loss": 1.6826,
79
- "step": 1000
80
- },
81
- {
82
- "epoch": 0.57,
83
- "grad_norm": 1.680190920829773,
84
- "learning_rate": 0.00019356409424244655,
85
- "loss": 1.5399,
86
- "step": 1100
87
- },
88
- {
89
- "epoch": 0.63,
90
- "grad_norm": 1.834701418876648,
91
- "learning_rate": 0.00019235654862989537,
92
- "loss": 1.6394,
93
- "step": 1200
94
- },
95
- {
96
- "epoch": 0.68,
97
- "grad_norm": 1.9838495254516602,
98
- "learning_rate": 0.00019104969198374688,
99
- "loss": 1.645,
100
- "step": 1300
101
- },
102
- {
103
- "epoch": 0.73,
104
- "grad_norm": 2.141674280166626,
105
- "learning_rate": 0.00018964492956747425,
106
- "loss": 1.5853,
107
- "step": 1400
108
- },
109
- {
110
- "epoch": 0.78,
111
- "grad_norm": 2.6233127117156982,
112
- "learning_rate": 0.00018814377192266423,
113
- "loss": 1.5778,
114
- "step": 1500
115
- },
116
- {
117
- "epoch": 0.84,
118
- "grad_norm": 1.6887329816818237,
119
- "learning_rate": 0.00018654783324473137,
120
- "loss": 1.6368,
121
- "step": 1600
122
- },
123
- {
124
- "epoch": 0.89,
125
- "grad_norm": 3.113426446914673,
126
- "learning_rate": 0.00018487617447307124,
127
- "loss": 1.6269,
128
- "step": 1700
129
- },
130
- {
131
- "epoch": 0.94,
132
- "grad_norm": 2.1365339756011963,
133
- "learning_rate": 0.00018309682531549338,
134
- "loss": 1.7073,
135
- "step": 1800
136
- },
137
- {
138
- "epoch": 0.99,
139
- "grad_norm": 2.3787343502044678,
140
- "learning_rate": 0.00018122812210849337,
141
- "loss": 1.5544,
142
- "step": 1900
143
- },
144
- {
145
- "epoch": 1.04,
146
- "grad_norm": 2.1635191440582275,
147
- "learning_rate": 0.00017927207426937544,
148
- "loss": 1.3285,
149
- "step": 2000
150
- },
151
- {
152
- "epoch": 1.1,
153
- "grad_norm": 2.331688642501831,
154
- "learning_rate": 0.00017723078513716157,
155
- "loss": 1.2529,
156
- "step": 2100
157
- },
158
- {
159
- "epoch": 1.15,
160
- "grad_norm": 5.185802936553955,
161
- "learning_rate": 0.00017510644971087015,
162
- "loss": 1.3708,
163
- "step": 2200
164
- },
165
- {
166
- "epoch": 1.2,
167
- "grad_norm": 3.1339027881622314,
168
- "learning_rate": 0.0001729013522892329,
169
- "loss": 1.3309,
170
- "step": 2300
171
- },
172
- {
173
- "epoch": 1.25,
174
- "grad_norm": 3.4236795902252197,
175
- "learning_rate": 0.0001706178640143872,
176
- "loss": 1.4201,
177
- "step": 2400
178
- },
179
- {
180
- "epoch": 1.3,
181
- "grad_norm": 3.849432945251465,
182
- "learning_rate": 0.00016825844032218625,
183
- "loss": 1.2956,
184
- "step": 2500
185
- },
186
- {
187
- "epoch": 1.36,
188
- "grad_norm": 3.8217601776123047,
189
- "learning_rate": 0.00016582561830186785,
190
- "loss": 1.3375,
191
- "step": 2600
192
- },
193
- {
194
- "epoch": 1.41,
195
- "grad_norm": 1.5418843030929565,
196
- "learning_rate": 0.00016332201396792123,
197
- "loss": 1.2897,
198
- "step": 2700
199
- },
200
- {
201
- "epoch": 1.46,
202
- "grad_norm": 2.2709996700286865,
203
- "learning_rate": 0.00016075031944708584,
204
- "loss": 1.3323,
205
- "step": 2800
206
- },
207
- {
208
- "epoch": 1.51,
209
- "grad_norm": 4.869096279144287,
210
- "learning_rate": 0.0001581133000835061,
211
- "loss": 1.2841,
212
- "step": 2900
213
- },
214
- {
215
- "epoch": 1.57,
216
- "grad_norm": 3.3247766494750977,
217
- "learning_rate": 0.00015541379146515603,
218
- "loss": 1.2666,
219
- "step": 3000
220
- },
221
- {
222
- "epoch": 1.62,
223
- "grad_norm": 4.0471110343933105,
224
- "learning_rate": 0.0001526546963747302,
225
- "loss": 1.2336,
226
- "step": 3100
227
- },
228
- {
229
- "epoch": 1.67,
230
- "grad_norm": 2.8835229873657227,
231
- "learning_rate": 0.00014986740918973633,
232
- "loss": 1.2627,
233
- "step": 3200
234
- },
235
- {
236
- "epoch": 1.72,
237
- "grad_norm": 4.162999629974365,
238
- "learning_rate": 0.00014699862334591993,
239
- "loss": 1.2683,
240
- "step": 3300
241
- },
242
- {
243
- "epoch": 1.77,
244
- "grad_norm": 2.161064624786377,
245
- "learning_rate": 0.00014407929986366458,
246
- "loss": 1.3076,
247
- "step": 3400
248
- },
249
- {
250
- "epoch": 1.83,
251
- "grad_norm": 2.2110209465026855,
252
- "learning_rate": 0.0001411125778926756,
253
- "loss": 1.3296,
254
- "step": 3500
255
- },
256
- {
257
- "epoch": 1.88,
258
- "grad_norm": 3.9512176513671875,
259
- "learning_rate": 0.0001381016475502724,
260
- "loss": 1.2321,
261
- "step": 3600
262
- },
263
- {
264
- "epoch": 1.93,
265
- "grad_norm": 1.0244474411010742,
266
- "learning_rate": 0.00013504974649105364,
267
- "loss": 1.1754,
268
- "step": 3700
269
- },
270
- {
271
- "epoch": 1.98,
272
- "grad_norm": 4.0455217361450195,
273
- "learning_rate": 0.0001319601564254462,
274
- "loss": 1.2665,
275
- "step": 3800
276
- },
277
- {
278
- "epoch": 2.04,
279
- "grad_norm": 4.162383556365967,
280
- "learning_rate": 0.00012883619959088054,
281
- "loss": 1.0379,
282
- "step": 3900
283
- },
284
- {
285
- "epoch": 2.09,
286
- "grad_norm": 2.3285574913024902,
287
- "learning_rate": 0.0001256812351793875,
288
- "loss": 0.8172,
289
- "step": 4000
290
- },
291
- {
292
- "epoch": 2.14,
293
- "grad_norm": 5.439870834350586,
294
- "learning_rate": 0.0001224986557254578,
295
- "loss": 0.8519,
296
- "step": 4100
297
- },
298
- {
299
- "epoch": 2.19,
300
- "grad_norm": 2.932978391647339,
301
- "learning_rate": 0.00011929188345804825,
302
- "loss": 0.8636,
303
- "step": 4200
304
- },
305
- {
306
- "epoch": 2.24,
307
- "grad_norm": 5.512314319610596,
308
- "learning_rate": 0.00011606436662065767,
309
- "loss": 0.8727,
310
- "step": 4300
311
- },
312
- {
313
- "epoch": 2.3,
314
- "grad_norm": 4.129833698272705,
315
- "learning_rate": 0.00011281957576342934,
316
- "loss": 0.8618,
317
- "step": 4400
318
- },
319
- {
320
- "epoch": 2.35,
321
- "grad_norm": 2.5295193195343018,
322
- "learning_rate": 0.00010956100001126682,
323
- "loss": 0.7976,
324
- "step": 4500
325
- },
326
- {
327
- "epoch": 2.4,
328
- "grad_norm": 4.148761749267578,
329
- "learning_rate": 0.00010629214331197683,
330
- "loss": 0.7871,
331
- "step": 4600
332
- },
333
- {
334
- "epoch": 2.45,
335
- "grad_norm": 5.00084924697876,
336
- "learning_rate": 0.00010301652066847249,
337
- "loss": 0.8557,
338
- "step": 4700
339
- },
340
- {
341
- "epoch": 2.51,
342
- "grad_norm": 5.721644878387451,
343
- "learning_rate": 9.973765435908962e-05,
344
- "loss": 0.8898,
345
- "step": 4800
346
- },
347
- {
348
- "epoch": 2.56,
349
- "grad_norm": 5.872501850128174,
350
- "learning_rate": 9.64590701500791e-05,
351
- "loss": 0.8269,
352
- "step": 4900
353
- },
354
- {
355
- "epoch": 2.61,
356
- "grad_norm": 5.41200065612793,
357
- "learning_rate": 9.318429350434922e-05,
358
- "loss": 0.7502,
359
- "step": 5000
360
- },
361
- {
362
- "epoch": 2.66,
363
- "grad_norm": 3.3696563243865967,
364
- "learning_rate": 8.991684579053403e-05,
365
- "loss": 0.8201,
366
- "step": 5100
367
- },
368
- {
369
- "epoch": 2.71,
370
- "grad_norm": 3.6817996501922607,
371
- "learning_rate": 8.666024049646397e-05,
372
- "loss": 0.8618,
373
- "step": 5200
374
- },
375
- {
376
- "epoch": 2.77,
377
- "grad_norm": 4.630326747894287,
378
- "learning_rate": 8.341797945111142e-05,
379
- "loss": 0.8255,
380
- "step": 5300
381
- },
382
- {
383
- "epoch": 2.82,
384
- "grad_norm": 2.672938823699951,
385
- "learning_rate": 8.019354905907224e-05,
386
- "loss": 0.759,
387
- "step": 5400
388
- },
389
- {
390
- "epoch": 2.87,
391
- "grad_norm": 3.0967512130737305,
392
- "learning_rate": 7.699041655163378e-05,
393
- "loss": 0.8287,
394
- "step": 5500
395
- },
396
- {
397
- "epoch": 2.92,
398
- "grad_norm": 4.052151679992676,
399
- "learning_rate": 7.381202625845948e-05,
400
- "loss": 0.8356,
401
- "step": 5600
402
- },
403
- {
404
- "epoch": 2.97,
405
- "grad_norm": 3.5286691188812256,
406
- "learning_rate": 7.066179590389994e-05,
407
- "loss": 0.9181,
408
- "step": 5700
409
- },
410
- {
411
- "epoch": 3.03,
412
- "grad_norm": 4.84535026550293,
413
- "learning_rate": 6.754311293191257e-05,
414
- "loss": 0.5811,
415
- "step": 5800
416
- },
417
- {
418
- "epoch": 3.08,
419
- "grad_norm": 2.9033737182617188,
420
- "learning_rate": 6.44593308635417e-05,
421
- "loss": 0.4604,
422
- "step": 5900
423
- },
424
- {
425
- "epoch": 3.13,
426
- "grad_norm": 1.6759638786315918,
427
- "learning_rate": 6.1413765690876e-05,
428
- "loss": 0.412,
429
- "step": 6000
430
- },
431
- {
432
- "epoch": 3.18,
433
- "grad_norm": 3.905642032623291,
434
- "learning_rate": 5.840969231136102e-05,
435
- "loss": 0.4478,
436
- "step": 6100
437
- },
438
- {
439
- "epoch": 3.24,
440
- "grad_norm": 7.030938148498535,
441
- "learning_rate": 5.5450341006300535e-05,
442
- "loss": 0.3905,
443
- "step": 6200
444
- },
445
- {
446
- "epoch": 3.29,
447
- "grad_norm": 5.1767988204956055,
448
- "learning_rate": 5.2538893967333866e-05,
449
- "loss": 0.401,
450
- "step": 6300
451
- },
452
- {
453
- "epoch": 3.34,
454
- "grad_norm": 2.0529091358184814,
455
- "learning_rate": 4.9678481874623836e-05,
456
- "loss": 0.417,
457
- "step": 6400
458
- },
459
- {
460
- "epoch": 3.39,
461
- "grad_norm": 4.730635643005371,
462
- "learning_rate": 4.687218053043516e-05,
463
- "loss": 0.4213,
464
- "step": 6500
465
- },
466
- {
467
- "epoch": 3.44,
468
- "grad_norm": 5.270570278167725,
469
- "learning_rate": 4.412300755172314e-05,
470
- "loss": 0.4371,
471
- "step": 6600
472
- },
473
- {
474
- "epoch": 3.5,
475
- "grad_norm": 5.049856185913086,
476
- "learning_rate": 4.1433919125288914e-05,
477
- "loss": 0.4686,
478
- "step": 6700
479
- },
480
- {
481
- "epoch": 3.55,
482
- "grad_norm": 5.543980121612549,
483
- "learning_rate": 3.8807806828990455e-05,
484
- "loss": 0.4017,
485
- "step": 6800
486
- },
487
- {
488
- "epoch": 3.6,
489
- "grad_norm": 5.048495292663574,
490
- "learning_rate": 3.624749452242799e-05,
491
- "loss": 0.4227,
492
- "step": 6900
493
- },
494
- {
495
- "epoch": 3.65,
496
- "grad_norm": 6.684240818023682,
497
- "learning_rate": 3.375573531044645e-05,
498
- "loss": 0.4035,
499
- "step": 7000
500
- },
501
- {
502
- "epoch": 3.71,
503
- "grad_norm": 6.066633701324463,
504
- "learning_rate": 3.1335208582720856e-05,
505
- "loss": 0.4271,
506
- "step": 7100
507
- },
508
- {
509
- "epoch": 3.76,
510
- "grad_norm": 4.037696361541748,
511
- "learning_rate": 2.8988517132607428e-05,
512
- "loss": 0.4546,
513
- "step": 7200
514
- },
515
- {
516
- "epoch": 3.81,
517
- "grad_norm": 4.5075602531433105,
518
- "learning_rate": 2.6718184358358956e-05,
519
- "loss": 0.3988,
520
- "step": 7300
521
- },
522
- {
523
- "epoch": 3.86,
524
- "grad_norm": 2.9646005630493164,
525
- "learning_rate": 2.4526651549713608e-05,
526
- "loss": 0.3839,
527
- "step": 7400
528
- },
529
- {
530
- "epoch": 3.91,
531
- "grad_norm": 6.707378387451172,
532
- "learning_rate": 2.2436969776075456e-05,
533
- "loss": 0.3956,
534
- "step": 7500
535
- },
536
- {
537
- "epoch": 3.97,
538
- "grad_norm": 6.770478248596191,
539
- "learning_rate": 2.040917410080746e-05,
540
- "loss": 0.4,
541
- "step": 7600
542
- },
543
- {
544
- "epoch": 4.02,
545
- "grad_norm": 3.7659125328063965,
546
- "learning_rate": 1.8466962472390136e-05,
547
- "loss": 0.3287,
548
- "step": 7700
549
- },
550
- {
551
- "epoch": 4.07,
552
- "grad_norm": 2.905404567718506,
553
- "learning_rate": 1.661242335176261e-05,
554
- "loss": 0.2069,
555
- "step": 7800
556
- },
557
- {
558
- "epoch": 4.12,
559
- "grad_norm": 5.363249778747559,
560
- "learning_rate": 1.4847550925581377e-05,
561
- "loss": 0.1645,
562
- "step": 7900
563
- },
564
- {
565
- "epoch": 4.18,
566
- "grad_norm": 5.363257884979248,
567
- "learning_rate": 1.3174242961870542e-05,
568
- "loss": 0.1888,
569
- "step": 8000
570
- },
571
- {
572
- "epoch": 4.23,
573
- "grad_norm": 2.431536912918091,
574
- "learning_rate": 1.1594298769351286e-05,
575
- "loss": 0.1644,
576
- "step": 8100
577
- },
578
- {
579
- "epoch": 4.28,
580
- "grad_norm": 2.4400837421417236,
581
- "learning_rate": 1.0109417262644261e-05,
582
- "loss": 0.2169,
583
- "step": 8200
584
- },
585
- {
586
- "epoch": 4.33,
587
- "grad_norm": 8.288840293884277,
588
- "learning_rate": 8.72119513542623e-06,
589
- "loss": 0.1992,
590
- "step": 8300
591
- },
592
- {
593
- "epoch": 4.38,
594
- "grad_norm": 3.36771297454834,
595
- "learning_rate": 7.431125143504525e-06,
596
- "loss": 0.2011,
597
- "step": 8400
598
- },
599
- {
600
- "epoch": 4.44,
601
- "grad_norm": 0.056750617921352386,
602
- "learning_rate": 6.240594499656316e-06,
603
- "loss": 0.1783,
604
- "step": 8500
605
- },
606
- {
607
- "epoch": 4.49,
608
- "grad_norm": 4.8992743492126465,
609
- "learning_rate": 5.150883381957983e-06,
610
- "loss": 0.1813,
611
- "step": 8600
612
- },
613
- {
614
- "epoch": 4.54,
615
- "grad_norm": 2.5150370597839355,
616
- "learning_rate": 4.1631635572092706e-06,
617
- "loss": 0.2027,
618
- "step": 8700
619
- },
620
- {
621
- "epoch": 4.59,
622
- "grad_norm": 4.545584201812744,
623
- "learning_rate": 3.2784971209318673e-06,
624
- "loss": 0.216,
625
- "step": 8800
626
- },
627
- {
628
- "epoch": 4.65,
629
- "grad_norm": 6.020129680633545,
630
- "learning_rate": 2.4978353552977398e-06,
631
- "loss": 0.202,
632
- "step": 8900
633
- },
634
- {
635
- "epoch": 4.7,
636
- "grad_norm": 5.093991279602051,
637
- "learning_rate": 1.822017706215029e-06,
638
- "loss": 0.1762,
639
- "step": 9000
640
- },
641
- {
642
- "epoch": 4.75,
643
- "grad_norm": 2.0878984928131104,
644
- "learning_rate": 1.2517708806714657e-06,
645
- "loss": 0.217,
646
- "step": 9100
647
- },
648
- {
649
- "epoch": 4.8,
650
- "grad_norm": 2.145759105682373,
651
- "learning_rate": 7.877080653061031e-07,
652
- "loss": 0.2045,
653
- "step": 9200
654
- },
655
- {
656
- "epoch": 4.85,
657
- "grad_norm": 8.151817321777344,
658
- "learning_rate": 4.303282670495068e-07,
659
- "loss": 0.2263,
660
- "step": 9300
661
- },
662
- {
663
- "epoch": 4.91,
664
- "grad_norm": 6.520354270935059,
665
- "learning_rate": 1.800157765413535e-07,
666
- "loss": 0.192,
667
- "step": 9400
668
- },
669
- {
670
- "epoch": 4.96,
671
- "grad_norm": 7.271907806396484,
672
- "learning_rate": 3.703975490257916e-08,
673
- "loss": 0.2436,
674
- "step": 9500
675
- },
676
- {
677
- "epoch": 5.0,
678
- "step": 9580,
679
- "total_flos": 1.5336527740560998e+18,
680
- "train_loss": 0.886494568245396,
681
- "train_runtime": 33369.3902,
682
- "train_samples_per_second": 1.148,
683
- "train_steps_per_second": 0.287
684
- }
685
- ],
686
- "logging_steps": 100,
687
- "max_steps": 9580,
688
- "num_input_tokens_seen": 0,
689
- "num_train_epochs": 5,
690
- "save_steps": 1000,
691
- "total_flos": 1.5336527740560998e+18,
692
- "train_batch_size": 4,
693
- "trial_name": null,
694
- "trial_params": null
695
- }