|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5769674590353104, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005769674590353104, |
|
"grad_norm": 0.7104284763336182, |
|
"learning_rate": 3.846153846153847e-07, |
|
"loss": 2.6483, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0011539349180706207, |
|
"grad_norm": 0.6569196581840515, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 2.6474, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0017309023771059312, |
|
"grad_norm": 0.5456475615501404, |
|
"learning_rate": 1.153846153846154e-06, |
|
"loss": 2.6342, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0023078698361412415, |
|
"grad_norm": 0.3985019028186798, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 2.6414, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.002884837295176552, |
|
"grad_norm": 0.48592114448547363, |
|
"learning_rate": 1.9230769230769234e-06, |
|
"loss": 2.6257, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0034618047542118624, |
|
"grad_norm": 0.32980403304100037, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 2.5972, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004038772213247173, |
|
"grad_norm": 0.3044310510158539, |
|
"learning_rate": 2.6923076923076928e-06, |
|
"loss": 2.6144, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.004615739672282483, |
|
"grad_norm": 0.2920003831386566, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 2.6037, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.005192707131317794, |
|
"grad_norm": 0.28606897592544556, |
|
"learning_rate": 3.4615384615384617e-06, |
|
"loss": 2.5913, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.005769674590353104, |
|
"grad_norm": 0.2709102928638458, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 2.5825, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.006346642049388415, |
|
"grad_norm": 0.28747376799583435, |
|
"learning_rate": 4.230769230769231e-06, |
|
"loss": 2.5782, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.006923609508423725, |
|
"grad_norm": 0.2798466384410858, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 2.5689, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.007500576967459035, |
|
"grad_norm": 0.27855777740478516, |
|
"learning_rate": 5e-06, |
|
"loss": 2.5601, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.008077544426494346, |
|
"grad_norm": 0.29982247948646545, |
|
"learning_rate": 5.3846153846153855e-06, |
|
"loss": 2.5455, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.008654511885529657, |
|
"grad_norm": 0.2967548668384552, |
|
"learning_rate": 5.76923076923077e-06, |
|
"loss": 2.5456, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.009231479344564966, |
|
"grad_norm": 0.4355414807796478, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 2.5428, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.009808446803600277, |
|
"grad_norm": 0.2910907566547394, |
|
"learning_rate": 6.538461538461539e-06, |
|
"loss": 2.543, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.010385414262635588, |
|
"grad_norm": 0.28916531801223755, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 2.5261, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.010962381721670899, |
|
"grad_norm": 0.4095863699913025, |
|
"learning_rate": 7.3076923076923085e-06, |
|
"loss": 2.5241, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.011539349180706208, |
|
"grad_norm": 0.4236544668674469, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 2.534, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.012116316639741519, |
|
"grad_norm": 0.3286638557910919, |
|
"learning_rate": 8.076923076923077e-06, |
|
"loss": 2.5113, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.01269328409877683, |
|
"grad_norm": 0.36950576305389404, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 2.5166, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.013270251557812139, |
|
"grad_norm": 0.3060932755470276, |
|
"learning_rate": 8.846153846153847e-06, |
|
"loss": 2.4978, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.01384721901684745, |
|
"grad_norm": 0.29038047790527344, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 2.5106, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01442418647588276, |
|
"grad_norm": 0.3052986264228821, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 2.4951, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01500115393491807, |
|
"grad_norm": 0.314464271068573, |
|
"learning_rate": 1e-05, |
|
"loss": 2.4922, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01557812139395338, |
|
"grad_norm": 0.3013986051082611, |
|
"learning_rate": 1.0384615384615386e-05, |
|
"loss": 2.4956, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.01615508885298869, |
|
"grad_norm": 0.33075615763664246, |
|
"learning_rate": 1.0769230769230771e-05, |
|
"loss": 2.4814, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.016732056312024002, |
|
"grad_norm": 0.4657692313194275, |
|
"learning_rate": 1.1153846153846154e-05, |
|
"loss": 2.4951, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.017309023771059313, |
|
"grad_norm": 0.40145745873451233, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 2.4692, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.017885991230094624, |
|
"grad_norm": 0.3732956349849701, |
|
"learning_rate": 1.1923076923076925e-05, |
|
"loss": 2.4855, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.018462958689129932, |
|
"grad_norm": 0.3338468074798584, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 2.4902, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.019039926148165243, |
|
"grad_norm": 0.3962608277797699, |
|
"learning_rate": 1.2692307692307691e-05, |
|
"loss": 2.4725, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.019616893607200554, |
|
"grad_norm": 0.34549835324287415, |
|
"learning_rate": 1.3076923076923078e-05, |
|
"loss": 2.4665, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.020193861066235864, |
|
"grad_norm": 0.5531402826309204, |
|
"learning_rate": 1.3461538461538462e-05, |
|
"loss": 2.4784, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.020770828525271175, |
|
"grad_norm": 0.3071824610233307, |
|
"learning_rate": 1.3846153846153847e-05, |
|
"loss": 2.4665, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.021347795984306486, |
|
"grad_norm": 0.32911962270736694, |
|
"learning_rate": 1.423076923076923e-05, |
|
"loss": 2.4597, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.021924763443341797, |
|
"grad_norm": 0.354469358921051, |
|
"learning_rate": 1.4615384615384617e-05, |
|
"loss": 2.4638, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.022501730902377105, |
|
"grad_norm": 0.3209117650985718, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.4492, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.023078698361412416, |
|
"grad_norm": 0.44386786222457886, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 2.4579, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.023655665820447726, |
|
"grad_norm": 0.4315970838069916, |
|
"learning_rate": 1.576923076923077e-05, |
|
"loss": 2.4712, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.024232633279483037, |
|
"grad_norm": 0.345197468996048, |
|
"learning_rate": 1.6153846153846154e-05, |
|
"loss": 2.4637, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.02480960073851835, |
|
"grad_norm": 0.43101733922958374, |
|
"learning_rate": 1.653846153846154e-05, |
|
"loss": 2.4584, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.02538656819755366, |
|
"grad_norm": 0.3445722460746765, |
|
"learning_rate": 1.6923076923076924e-05, |
|
"loss": 2.4607, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.02596353565658897, |
|
"grad_norm": 0.38286539912223816, |
|
"learning_rate": 1.730769230769231e-05, |
|
"loss": 2.4451, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.026540503115624278, |
|
"grad_norm": 0.7410378456115723, |
|
"learning_rate": 1.7692307692307694e-05, |
|
"loss": 2.4443, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.02711747057465959, |
|
"grad_norm": 0.36650899052619934, |
|
"learning_rate": 1.8076923076923076e-05, |
|
"loss": 2.4513, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.0276944380336949, |
|
"grad_norm": 0.33747929334640503, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 2.4286, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.02827140549273021, |
|
"grad_norm": 0.3847596347332001, |
|
"learning_rate": 1.8846153846153846e-05, |
|
"loss": 2.4371, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.02884837295176552, |
|
"grad_norm": 0.3613141179084778, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 2.4316, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.029425340410800832, |
|
"grad_norm": 0.515495240688324, |
|
"learning_rate": 1.9615384615384617e-05, |
|
"loss": 2.4454, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.03000230786983614, |
|
"grad_norm": 0.4233556389808655, |
|
"learning_rate": 2e-05, |
|
"loss": 2.4263, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.03057927532887145, |
|
"grad_norm": 0.34941181540489197, |
|
"learning_rate": 2.0384615384615387e-05, |
|
"loss": 2.4429, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.03115624278790676, |
|
"grad_norm": 0.47630569338798523, |
|
"learning_rate": 2.0769230769230772e-05, |
|
"loss": 2.4181, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.031733210246942076, |
|
"grad_norm": 0.6628788709640503, |
|
"learning_rate": 2.1153846153846154e-05, |
|
"loss": 2.4181, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.03231017770597738, |
|
"grad_norm": 0.6385440826416016, |
|
"learning_rate": 2.1538461538461542e-05, |
|
"loss": 2.4207, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.03288714516501269, |
|
"grad_norm": 0.35360780358314514, |
|
"learning_rate": 2.1923076923076924e-05, |
|
"loss": 2.435, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.033464112624048005, |
|
"grad_norm": 0.6603436470031738, |
|
"learning_rate": 2.230769230769231e-05, |
|
"loss": 2.413, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.03404108008308331, |
|
"grad_norm": 0.49220070242881775, |
|
"learning_rate": 2.2692307692307694e-05, |
|
"loss": 2.4161, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.03461804754211863, |
|
"grad_norm": 0.33461564779281616, |
|
"learning_rate": 2.307692307692308e-05, |
|
"loss": 2.4363, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.035195015001153934, |
|
"grad_norm": 0.3938097357749939, |
|
"learning_rate": 2.3461538461538464e-05, |
|
"loss": 2.4059, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.03577198246018925, |
|
"grad_norm": 0.559905469417572, |
|
"learning_rate": 2.384615384615385e-05, |
|
"loss": 2.4163, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.036348949919224556, |
|
"grad_norm": 0.4643310606479645, |
|
"learning_rate": 2.423076923076923e-05, |
|
"loss": 2.4223, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.036925917378259863, |
|
"grad_norm": 0.44358041882514954, |
|
"learning_rate": 2.461538461538462e-05, |
|
"loss": 2.4181, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.03750288483729518, |
|
"grad_norm": 0.4786848723888397, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.419, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.038079852296330485, |
|
"grad_norm": 2.538026809692383, |
|
"learning_rate": 2.5384615384615383e-05, |
|
"loss": 2.4174, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.0386568197553658, |
|
"grad_norm": 0.8546634912490845, |
|
"learning_rate": 2.5769230769230768e-05, |
|
"loss": 2.4146, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.03923378721440111, |
|
"grad_norm": 0.4435669481754303, |
|
"learning_rate": 2.6153846153846157e-05, |
|
"loss": 2.4042, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.03981075467343642, |
|
"grad_norm": 0.4790751039981842, |
|
"learning_rate": 2.6538461538461538e-05, |
|
"loss": 2.3906, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.04038772213247173, |
|
"grad_norm": 0.40961435437202454, |
|
"learning_rate": 2.6923076923076923e-05, |
|
"loss": 2.4012, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.040964689591507036, |
|
"grad_norm": 0.5555410385131836, |
|
"learning_rate": 2.7307692307692305e-05, |
|
"loss": 2.385, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.04154165705054235, |
|
"grad_norm": 0.5821479558944702, |
|
"learning_rate": 2.7692307692307694e-05, |
|
"loss": 2.3953, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.04211862450957766, |
|
"grad_norm": 0.40736421942710876, |
|
"learning_rate": 2.807692307692308e-05, |
|
"loss": 2.3962, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.04269559196861297, |
|
"grad_norm": 0.4075765907764435, |
|
"learning_rate": 2.846153846153846e-05, |
|
"loss": 2.39, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.04327255942764828, |
|
"grad_norm": 0.33844342827796936, |
|
"learning_rate": 2.8846153846153845e-05, |
|
"loss": 2.3841, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.043849526886683594, |
|
"grad_norm": 0.40746697783470154, |
|
"learning_rate": 2.9230769230769234e-05, |
|
"loss": 2.4011, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.0444264943457189, |
|
"grad_norm": 0.5998666882514954, |
|
"learning_rate": 2.9615384615384616e-05, |
|
"loss": 2.3927, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.04500346180475421, |
|
"grad_norm": 0.875724196434021, |
|
"learning_rate": 3e-05, |
|
"loss": 2.3916, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.045580429263789524, |
|
"grad_norm": 0.9473147392272949, |
|
"learning_rate": 3.0384615384615382e-05, |
|
"loss": 2.3865, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.04615739672282483, |
|
"grad_norm": 0.6188512444496155, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 2.3809, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.046734364181860145, |
|
"grad_norm": 0.6592457890510559, |
|
"learning_rate": 3.115384615384615e-05, |
|
"loss": 2.3864, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.04731133164089545, |
|
"grad_norm": 0.718359649181366, |
|
"learning_rate": 3.153846153846154e-05, |
|
"loss": 2.3697, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.04788829909993077, |
|
"grad_norm": 0.6391853094100952, |
|
"learning_rate": 3.192307692307692e-05, |
|
"loss": 2.3903, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.048465266558966075, |
|
"grad_norm": 0.6799693703651428, |
|
"learning_rate": 3.230769230769231e-05, |
|
"loss": 2.3955, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.04904223401800138, |
|
"grad_norm": 0.47043943405151367, |
|
"learning_rate": 3.269230769230769e-05, |
|
"loss": 2.376, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.0496192014770367, |
|
"grad_norm": 0.45935624837875366, |
|
"learning_rate": 3.307692307692308e-05, |
|
"loss": 2.3873, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.050196168936072004, |
|
"grad_norm": 0.44479900598526, |
|
"learning_rate": 3.346153846153846e-05, |
|
"loss": 2.3721, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.05077313639510732, |
|
"grad_norm": 0.37469252943992615, |
|
"learning_rate": 3.384615384615385e-05, |
|
"loss": 2.3748, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.051350103854142626, |
|
"grad_norm": 0.5100722312927246, |
|
"learning_rate": 3.4230769230769234e-05, |
|
"loss": 2.3828, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.05192707131317794, |
|
"grad_norm": 0.43965014815330505, |
|
"learning_rate": 3.461538461538462e-05, |
|
"loss": 2.3758, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05250403877221325, |
|
"grad_norm": 0.8576379418373108, |
|
"learning_rate": 3.5e-05, |
|
"loss": 2.3844, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.053081006231248555, |
|
"grad_norm": 0.8042026162147522, |
|
"learning_rate": 3.538461538461539e-05, |
|
"loss": 2.3671, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.05365797369028387, |
|
"grad_norm": 0.7367364764213562, |
|
"learning_rate": 3.5769230769230774e-05, |
|
"loss": 2.3879, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.05423494114931918, |
|
"grad_norm": 0.7298218607902527, |
|
"learning_rate": 3.615384615384615e-05, |
|
"loss": 2.3766, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.05481190860835449, |
|
"grad_norm": 0.39043882489204407, |
|
"learning_rate": 3.653846153846154e-05, |
|
"loss": 2.3736, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.0553888760673898, |
|
"grad_norm": 0.6030943989753723, |
|
"learning_rate": 3.692307692307693e-05, |
|
"loss": 2.3579, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.05596584352642511, |
|
"grad_norm": 0.6056037545204163, |
|
"learning_rate": 3.730769230769231e-05, |
|
"loss": 2.3579, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.05654281098546042, |
|
"grad_norm": 0.37389466166496277, |
|
"learning_rate": 3.769230769230769e-05, |
|
"loss": 2.3746, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.05711977844449573, |
|
"grad_norm": 0.3487268388271332, |
|
"learning_rate": 3.807692307692308e-05, |
|
"loss": 2.3633, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.05769674590353104, |
|
"grad_norm": 0.4989362955093384, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 2.3639, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05827371336256635, |
|
"grad_norm": 0.3485865890979767, |
|
"learning_rate": 3.884615384615385e-05, |
|
"loss": 2.3655, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.058850680821601664, |
|
"grad_norm": 1.3665262460708618, |
|
"learning_rate": 3.923076923076923e-05, |
|
"loss": 2.3559, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.05942764828063697, |
|
"grad_norm": 1.7110989093780518, |
|
"learning_rate": 3.961538461538462e-05, |
|
"loss": 2.372, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.06000461573967228, |
|
"grad_norm": 1.2965339422225952, |
|
"learning_rate": 4e-05, |
|
"loss": 2.3641, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.06058158319870759, |
|
"grad_norm": 1.0707265138626099, |
|
"learning_rate": 4.038461538461539e-05, |
|
"loss": 2.3682, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.0611585506577429, |
|
"grad_norm": 1.106713056564331, |
|
"learning_rate": 4.0769230769230773e-05, |
|
"loss": 2.3559, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.061735518116778215, |
|
"grad_norm": 2.2424588203430176, |
|
"learning_rate": 4.115384615384615e-05, |
|
"loss": 2.3661, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.06231248557581352, |
|
"grad_norm": 0.48183363676071167, |
|
"learning_rate": 4.1538461538461544e-05, |
|
"loss": 2.3442, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.06288945303484883, |
|
"grad_norm": 0.9450508952140808, |
|
"learning_rate": 4.192307692307693e-05, |
|
"loss": 2.3386, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.06346642049388415, |
|
"grad_norm": 1.067024827003479, |
|
"learning_rate": 4.230769230769231e-05, |
|
"loss": 2.3532, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.06404338795291946, |
|
"grad_norm": 0.9552332162857056, |
|
"learning_rate": 4.269230769230769e-05, |
|
"loss": 2.3392, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.06462035541195477, |
|
"grad_norm": 0.8050335645675659, |
|
"learning_rate": 4.3076923076923084e-05, |
|
"loss": 2.3371, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.06519732287099007, |
|
"grad_norm": 0.6774647235870361, |
|
"learning_rate": 4.346153846153846e-05, |
|
"loss": 2.3532, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.06577429033002538, |
|
"grad_norm": 0.4367026388645172, |
|
"learning_rate": 4.384615384615385e-05, |
|
"loss": 2.3487, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.0663512577890607, |
|
"grad_norm": 0.4666714072227478, |
|
"learning_rate": 4.423076923076923e-05, |
|
"loss": 2.3596, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.06692822524809601, |
|
"grad_norm": 0.4387041926383972, |
|
"learning_rate": 4.461538461538462e-05, |
|
"loss": 2.3468, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.06750519270713132, |
|
"grad_norm": 0.4057537913322449, |
|
"learning_rate": 4.5e-05, |
|
"loss": 2.323, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.06808216016616662, |
|
"grad_norm": 0.5073262453079224, |
|
"learning_rate": 4.538461538461539e-05, |
|
"loss": 2.3377, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.06865912762520193, |
|
"grad_norm": 0.40886059403419495, |
|
"learning_rate": 4.576923076923077e-05, |
|
"loss": 2.3369, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.06923609508423725, |
|
"grad_norm": 0.46122193336486816, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 2.3357, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.06981306254327256, |
|
"grad_norm": 0.7845937013626099, |
|
"learning_rate": 4.653846153846154e-05, |
|
"loss": 2.3496, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.07039003000230787, |
|
"grad_norm": 0.4420381188392639, |
|
"learning_rate": 4.692307692307693e-05, |
|
"loss": 2.3529, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.07096699746134318, |
|
"grad_norm": 0.4529377222061157, |
|
"learning_rate": 4.730769230769231e-05, |
|
"loss": 2.3197, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.0715439649203785, |
|
"grad_norm": 0.6386104822158813, |
|
"learning_rate": 4.76923076923077e-05, |
|
"loss": 2.3446, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.0721209323794138, |
|
"grad_norm": 0.7226986289024353, |
|
"learning_rate": 4.8076923076923084e-05, |
|
"loss": 2.3497, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07269789983844911, |
|
"grad_norm": 1.063812017440796, |
|
"learning_rate": 4.846153846153846e-05, |
|
"loss": 2.3506, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.07327486729748442, |
|
"grad_norm": 0.511073112487793, |
|
"learning_rate": 4.884615384615385e-05, |
|
"loss": 2.3446, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.07385183475651973, |
|
"grad_norm": 0.3652757406234741, |
|
"learning_rate": 4.923076923076924e-05, |
|
"loss": 2.3282, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.07442880221555505, |
|
"grad_norm": 0.4461408853530884, |
|
"learning_rate": 4.961538461538462e-05, |
|
"loss": 2.3417, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.07500576967459036, |
|
"grad_norm": 0.47319433093070984, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3418, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.07558273713362566, |
|
"grad_norm": 0.4424804151058197, |
|
"learning_rate": 5.038461538461539e-05, |
|
"loss": 2.3349, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.07615970459266097, |
|
"grad_norm": 0.509090781211853, |
|
"learning_rate": 5.0769230769230766e-05, |
|
"loss": 2.3236, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.07673667205169628, |
|
"grad_norm": 1.1174790859222412, |
|
"learning_rate": 5.115384615384615e-05, |
|
"loss": 2.3317, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.0773136395107316, |
|
"grad_norm": 0.4461131989955902, |
|
"learning_rate": 5.1538461538461536e-05, |
|
"loss": 2.325, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.0778906069697669, |
|
"grad_norm": 0.40025824308395386, |
|
"learning_rate": 5.192307692307693e-05, |
|
"loss": 2.3391, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.07846757442880221, |
|
"grad_norm": 0.5350430011749268, |
|
"learning_rate": 5.230769230769231e-05, |
|
"loss": 2.3427, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.07904454188783752, |
|
"grad_norm": 0.4814503788948059, |
|
"learning_rate": 5.26923076923077e-05, |
|
"loss": 2.3374, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.07962150934687284, |
|
"grad_norm": 0.8118190765380859, |
|
"learning_rate": 5.3076923076923076e-05, |
|
"loss": 2.3342, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.08019847680590815, |
|
"grad_norm": 0.8076496720314026, |
|
"learning_rate": 5.346153846153846e-05, |
|
"loss": 2.3348, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.08077544426494346, |
|
"grad_norm": 0.43335264921188354, |
|
"learning_rate": 5.384615384615385e-05, |
|
"loss": 2.3369, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.08135241172397877, |
|
"grad_norm": 0.704337477684021, |
|
"learning_rate": 5.423076923076923e-05, |
|
"loss": 2.3227, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.08192937918301407, |
|
"grad_norm": 0.5695681571960449, |
|
"learning_rate": 5.461538461538461e-05, |
|
"loss": 2.3209, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.0825063466420494, |
|
"grad_norm": 0.5841794610023499, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 2.3135, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.0830833141010847, |
|
"grad_norm": 0.5450507402420044, |
|
"learning_rate": 5.538461538461539e-05, |
|
"loss": 2.322, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.08366028156012001, |
|
"grad_norm": 0.5215712785720825, |
|
"learning_rate": 5.576923076923077e-05, |
|
"loss": 2.3356, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.08423724901915532, |
|
"grad_norm": 0.5433002710342407, |
|
"learning_rate": 5.615384615384616e-05, |
|
"loss": 2.3187, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.08481421647819062, |
|
"grad_norm": 0.4980168342590332, |
|
"learning_rate": 5.653846153846154e-05, |
|
"loss": 2.3164, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.08539118393722595, |
|
"grad_norm": 1.0669074058532715, |
|
"learning_rate": 5.692307692307692e-05, |
|
"loss": 2.3226, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.08596815139626125, |
|
"grad_norm": 0.569298505783081, |
|
"learning_rate": 5.7307692307692306e-05, |
|
"loss": 2.3098, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.08654511885529656, |
|
"grad_norm": 0.7760196328163147, |
|
"learning_rate": 5.769230769230769e-05, |
|
"loss": 2.3265, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08712208631433187, |
|
"grad_norm": 0.5390911102294922, |
|
"learning_rate": 5.807692307692308e-05, |
|
"loss": 2.3122, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.08769905377336719, |
|
"grad_norm": 0.8864639401435852, |
|
"learning_rate": 5.846153846153847e-05, |
|
"loss": 2.3155, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.0882760212324025, |
|
"grad_norm": 0.4188685715198517, |
|
"learning_rate": 5.884615384615385e-05, |
|
"loss": 2.3087, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.0888529886914378, |
|
"grad_norm": 0.5072064399719238, |
|
"learning_rate": 5.923076923076923e-05, |
|
"loss": 2.3091, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.08942995615047311, |
|
"grad_norm": 0.7691831588745117, |
|
"learning_rate": 5.9615384615384616e-05, |
|
"loss": 2.3165, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.09000692360950842, |
|
"grad_norm": 0.3790508806705475, |
|
"learning_rate": 6e-05, |
|
"loss": 2.3162, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.09058389106854374, |
|
"grad_norm": 0.9513830542564392, |
|
"learning_rate": 6.038461538461539e-05, |
|
"loss": 2.3184, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.09116085852757905, |
|
"grad_norm": 0.6464409828186035, |
|
"learning_rate": 6.0769230769230765e-05, |
|
"loss": 2.3297, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.09173782598661435, |
|
"grad_norm": 0.5319147109985352, |
|
"learning_rate": 6.115384615384616e-05, |
|
"loss": 2.2935, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.09231479344564966, |
|
"grad_norm": 0.9692733883857727, |
|
"learning_rate": 6.153846153846155e-05, |
|
"loss": 2.3077, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.09289176090468497, |
|
"grad_norm": 0.448768675327301, |
|
"learning_rate": 6.192307692307693e-05, |
|
"loss": 2.3078, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.09346872836372029, |
|
"grad_norm": 0.6407192945480347, |
|
"learning_rate": 6.23076923076923e-05, |
|
"loss": 2.3069, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.0940456958227556, |
|
"grad_norm": 0.6339554786682129, |
|
"learning_rate": 6.26923076923077e-05, |
|
"loss": 2.31, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.0946226632817909, |
|
"grad_norm": 0.634101152420044, |
|
"learning_rate": 6.307692307692308e-05, |
|
"loss": 2.3068, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.09519963074082621, |
|
"grad_norm": 0.6417617797851562, |
|
"learning_rate": 6.346153846153847e-05, |
|
"loss": 2.297, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.09577659819986153, |
|
"grad_norm": 0.5501047968864441, |
|
"learning_rate": 6.384615384615385e-05, |
|
"loss": 2.2972, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.09635356565889684, |
|
"grad_norm": 1.1125547885894775, |
|
"learning_rate": 6.423076923076924e-05, |
|
"loss": 2.3142, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.09693053311793215, |
|
"grad_norm": 1.900116205215454, |
|
"learning_rate": 6.461538461538462e-05, |
|
"loss": 2.2999, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.09750750057696746, |
|
"grad_norm": 1.4570536613464355, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 2.2935, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.09808446803600276, |
|
"grad_norm": 1.8386913537979126, |
|
"learning_rate": 6.538461538461539e-05, |
|
"loss": 2.2942, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.09866143549503809, |
|
"grad_norm": 1.289682388305664, |
|
"learning_rate": 6.576923076923078e-05, |
|
"loss": 2.306, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.0992384029540734, |
|
"grad_norm": 1.354390025138855, |
|
"learning_rate": 6.615384615384616e-05, |
|
"loss": 2.2973, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.0998153704131087, |
|
"grad_norm": 1.333463430404663, |
|
"learning_rate": 6.653846153846153e-05, |
|
"loss": 2.2863, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.10039233787214401, |
|
"grad_norm": 1.2394797801971436, |
|
"learning_rate": 6.692307692307693e-05, |
|
"loss": 2.2857, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.10096930533117932, |
|
"grad_norm": 1.4308770895004272, |
|
"learning_rate": 6.730769230769232e-05, |
|
"loss": 2.2901, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.10154627279021464, |
|
"grad_norm": 1.2967873811721802, |
|
"learning_rate": 6.76923076923077e-05, |
|
"loss": 2.2997, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.10212324024924994, |
|
"grad_norm": 1.192817211151123, |
|
"learning_rate": 6.807692307692309e-05, |
|
"loss": 2.2841, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.10270020770828525, |
|
"grad_norm": 1.3187321424484253, |
|
"learning_rate": 6.846153846153847e-05, |
|
"loss": 2.2893, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.10327717516732056, |
|
"grad_norm": 1.267285943031311, |
|
"learning_rate": 6.884615384615385e-05, |
|
"loss": 2.2901, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.10385414262635588, |
|
"grad_norm": 1.3133481740951538, |
|
"learning_rate": 6.923076923076924e-05, |
|
"loss": 2.2961, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.10443111008539119, |
|
"grad_norm": 1.097119927406311, |
|
"learning_rate": 6.961538461538462e-05, |
|
"loss": 2.301, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.1050080775444265, |
|
"grad_norm": 1.2084568738937378, |
|
"learning_rate": 7e-05, |
|
"loss": 2.2936, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.1055850450034618, |
|
"grad_norm": 1.1895954608917236, |
|
"learning_rate": 7.03846153846154e-05, |
|
"loss": 2.2985, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.10616201246249711, |
|
"grad_norm": 0.8945775628089905, |
|
"learning_rate": 7.076923076923078e-05, |
|
"loss": 2.2972, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.10673897992153243, |
|
"grad_norm": 0.39187899231910706, |
|
"learning_rate": 7.115384615384616e-05, |
|
"loss": 2.2916, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.10731594738056774, |
|
"grad_norm": 0.9170978665351868, |
|
"learning_rate": 7.153846153846155e-05, |
|
"loss": 2.2832, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.10789291483960305, |
|
"grad_norm": 0.37489083409309387, |
|
"learning_rate": 7.192307692307693e-05, |
|
"loss": 2.2882, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.10846988229863835, |
|
"grad_norm": 0.3608030080795288, |
|
"learning_rate": 7.23076923076923e-05, |
|
"loss": 2.2875, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.10904684975767366, |
|
"grad_norm": 0.3541902005672455, |
|
"learning_rate": 7.26923076923077e-05, |
|
"loss": 2.2817, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.10962381721670898, |
|
"grad_norm": 0.42174556851387024, |
|
"learning_rate": 7.307692307692307e-05, |
|
"loss": 2.2812, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.11020078467574429, |
|
"grad_norm": 0.3870134651660919, |
|
"learning_rate": 7.346153846153847e-05, |
|
"loss": 2.291, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.1107777521347796, |
|
"grad_norm": 0.5459268689155579, |
|
"learning_rate": 7.384615384615386e-05, |
|
"loss": 2.2766, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.1113547195938149, |
|
"grad_norm": 0.4281494915485382, |
|
"learning_rate": 7.423076923076924e-05, |
|
"loss": 2.2809, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.11193168705285023, |
|
"grad_norm": 0.5660754442214966, |
|
"learning_rate": 7.461538461538462e-05, |
|
"loss": 2.2832, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.11250865451188553, |
|
"grad_norm": 0.3824213445186615, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 2.2833, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.11308562197092084, |
|
"grad_norm": 0.39723706245422363, |
|
"learning_rate": 7.538461538461539e-05, |
|
"loss": 2.2826, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.11366258942995615, |
|
"grad_norm": 0.3764449656009674, |
|
"learning_rate": 7.576923076923076e-05, |
|
"loss": 2.2773, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.11423955688899146, |
|
"grad_norm": 0.4552217423915863, |
|
"learning_rate": 7.615384615384616e-05, |
|
"loss": 2.2741, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.11481652434802678, |
|
"grad_norm": 0.37920665740966797, |
|
"learning_rate": 7.653846153846153e-05, |
|
"loss": 2.2841, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.11539349180706208, |
|
"grad_norm": 0.5208818316459656, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 2.2713, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11597045926609739, |
|
"grad_norm": 0.40460675954818726, |
|
"learning_rate": 7.730769230769232e-05, |
|
"loss": 2.2902, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.1165474267251327, |
|
"grad_norm": 0.41813430190086365, |
|
"learning_rate": 7.76923076923077e-05, |
|
"loss": 2.2752, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.11712439418416801, |
|
"grad_norm": 0.45470941066741943, |
|
"learning_rate": 7.807692307692307e-05, |
|
"loss": 2.2829, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.11770136164320333, |
|
"grad_norm": 0.5748339891433716, |
|
"learning_rate": 7.846153846153847e-05, |
|
"loss": 2.2865, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.11827832910223864, |
|
"grad_norm": 0.6586531400680542, |
|
"learning_rate": 7.884615384615384e-05, |
|
"loss": 2.2764, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.11885529656127394, |
|
"grad_norm": 0.47713714838027954, |
|
"learning_rate": 7.923076923076924e-05, |
|
"loss": 2.2763, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.11943226402030925, |
|
"grad_norm": 0.5001585483551025, |
|
"learning_rate": 7.961538461538461e-05, |
|
"loss": 2.2637, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.12000923147934456, |
|
"grad_norm": 0.49730342626571655, |
|
"learning_rate": 8e-05, |
|
"loss": 2.2697, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.12058619893837988, |
|
"grad_norm": 0.7335867285728455, |
|
"learning_rate": 8.038461538461538e-05, |
|
"loss": 2.2798, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.12116316639741519, |
|
"grad_norm": 0.7102777361869812, |
|
"learning_rate": 8.076923076923078e-05, |
|
"loss": 2.2659, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.1217401338564505, |
|
"grad_norm": 0.5046675801277161, |
|
"learning_rate": 8.115384615384616e-05, |
|
"loss": 2.2692, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.1223171013154858, |
|
"grad_norm": 0.48471033573150635, |
|
"learning_rate": 8.153846153846155e-05, |
|
"loss": 2.2664, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.12289406877452112, |
|
"grad_norm": 0.5488987565040588, |
|
"learning_rate": 8.192307692307693e-05, |
|
"loss": 2.2862, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.12347103623355643, |
|
"grad_norm": 0.5509325265884399, |
|
"learning_rate": 8.23076923076923e-05, |
|
"loss": 2.2736, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.12404800369259174, |
|
"grad_norm": 0.47769248485565186, |
|
"learning_rate": 8.26923076923077e-05, |
|
"loss": 2.259, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.12462497115162705, |
|
"grad_norm": 0.5516425371170044, |
|
"learning_rate": 8.307692307692309e-05, |
|
"loss": 2.2642, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.12520193861066237, |
|
"grad_norm": 0.3504526615142822, |
|
"learning_rate": 8.346153846153847e-05, |
|
"loss": 2.2652, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.12577890606969766, |
|
"grad_norm": 0.720689058303833, |
|
"learning_rate": 8.384615384615386e-05, |
|
"loss": 2.2765, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.12635587352873298, |
|
"grad_norm": 0.7643449306488037, |
|
"learning_rate": 8.423076923076924e-05, |
|
"loss": 2.27, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.1269328409877683, |
|
"grad_norm": 0.47102758288383484, |
|
"learning_rate": 8.461538461538461e-05, |
|
"loss": 2.2595, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.1275098084468036, |
|
"grad_norm": 0.416532427072525, |
|
"learning_rate": 8.5e-05, |
|
"loss": 2.2669, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.12808677590583892, |
|
"grad_norm": 0.37624692916870117, |
|
"learning_rate": 8.538461538461538e-05, |
|
"loss": 2.2748, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.1286637433648742, |
|
"grad_norm": 0.4788731634616852, |
|
"learning_rate": 8.576923076923076e-05, |
|
"loss": 2.2701, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.12924071082390953, |
|
"grad_norm": 0.34961792826652527, |
|
"learning_rate": 8.615384615384617e-05, |
|
"loss": 2.2658, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.12981767828294485, |
|
"grad_norm": 0.5307486057281494, |
|
"learning_rate": 8.653846153846155e-05, |
|
"loss": 2.2651, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13039464574198015, |
|
"grad_norm": 0.6055422425270081, |
|
"learning_rate": 8.692307692307692e-05, |
|
"loss": 2.2625, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.13097161320101547, |
|
"grad_norm": 0.5680871605873108, |
|
"learning_rate": 8.730769230769232e-05, |
|
"loss": 2.256, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.13154858066005076, |
|
"grad_norm": 1.0871108770370483, |
|
"learning_rate": 8.76923076923077e-05, |
|
"loss": 2.2568, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.13212554811908608, |
|
"grad_norm": 0.5224435329437256, |
|
"learning_rate": 8.807692307692307e-05, |
|
"loss": 2.2741, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.1327025155781214, |
|
"grad_norm": 0.587762713432312, |
|
"learning_rate": 8.846153846153847e-05, |
|
"loss": 2.2708, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.1332794830371567, |
|
"grad_norm": 0.6314758062362671, |
|
"learning_rate": 8.884615384615384e-05, |
|
"loss": 2.2716, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.13385645049619202, |
|
"grad_norm": 0.4869266748428345, |
|
"learning_rate": 8.923076923076924e-05, |
|
"loss": 2.2511, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.1344334179552273, |
|
"grad_norm": 0.4189651608467102, |
|
"learning_rate": 8.961538461538463e-05, |
|
"loss": 2.2634, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.13501038541426263, |
|
"grad_norm": 0.6171414852142334, |
|
"learning_rate": 9e-05, |
|
"loss": 2.2448, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.13558735287329796, |
|
"grad_norm": 0.8491079807281494, |
|
"learning_rate": 9.038461538461538e-05, |
|
"loss": 2.2592, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.13616432033233325, |
|
"grad_norm": 0.32807987928390503, |
|
"learning_rate": 9.076923076923078e-05, |
|
"loss": 2.2538, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.13674128779136857, |
|
"grad_norm": 0.3959665894508362, |
|
"learning_rate": 9.115384615384615e-05, |
|
"loss": 2.2574, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.13731825525040386, |
|
"grad_norm": 0.8039582967758179, |
|
"learning_rate": 9.153846153846155e-05, |
|
"loss": 2.2581, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.13789522270943919, |
|
"grad_norm": 0.6241025924682617, |
|
"learning_rate": 9.192307692307692e-05, |
|
"loss": 2.2503, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.1384721901684745, |
|
"grad_norm": 0.678353488445282, |
|
"learning_rate": 9.230769230769232e-05, |
|
"loss": 2.2723, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.1390491576275098, |
|
"grad_norm": 0.9290316104888916, |
|
"learning_rate": 9.26923076923077e-05, |
|
"loss": 2.2542, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.13962612508654512, |
|
"grad_norm": 0.3607237935066223, |
|
"learning_rate": 9.307692307692309e-05, |
|
"loss": 2.2639, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.14020309254558042, |
|
"grad_norm": 0.47618988156318665, |
|
"learning_rate": 9.346153846153846e-05, |
|
"loss": 2.2493, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.14078006000461574, |
|
"grad_norm": 0.40734684467315674, |
|
"learning_rate": 9.384615384615386e-05, |
|
"loss": 2.2563, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.14135702746365106, |
|
"grad_norm": 0.3996923267841339, |
|
"learning_rate": 9.423076923076924e-05, |
|
"loss": 2.2489, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.14193399492268635, |
|
"grad_norm": 0.509465754032135, |
|
"learning_rate": 9.461538461538461e-05, |
|
"loss": 2.2477, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.14251096238172167, |
|
"grad_norm": 0.3685857653617859, |
|
"learning_rate": 9.5e-05, |
|
"loss": 2.2667, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.143087929840757, |
|
"grad_norm": 0.5704885125160217, |
|
"learning_rate": 9.53846153846154e-05, |
|
"loss": 2.2584, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.1436648972997923, |
|
"grad_norm": 0.5545893907546997, |
|
"learning_rate": 9.576923076923078e-05, |
|
"loss": 2.2614, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.1442418647588276, |
|
"grad_norm": 0.4671754539012909, |
|
"learning_rate": 9.615384615384617e-05, |
|
"loss": 2.2643, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.1448188322178629, |
|
"grad_norm": 0.4108332097530365, |
|
"learning_rate": 9.653846153846155e-05, |
|
"loss": 2.2519, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.14539579967689822, |
|
"grad_norm": 0.6676947474479675, |
|
"learning_rate": 9.692307692307692e-05, |
|
"loss": 2.2634, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.14597276713593355, |
|
"grad_norm": 0.7467900514602661, |
|
"learning_rate": 9.730769230769232e-05, |
|
"loss": 2.2526, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.14654973459496884, |
|
"grad_norm": 0.7136681079864502, |
|
"learning_rate": 9.76923076923077e-05, |
|
"loss": 2.2539, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.14712670205400416, |
|
"grad_norm": 0.4307969808578491, |
|
"learning_rate": 9.807692307692307e-05, |
|
"loss": 2.2483, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.14770366951303945, |
|
"grad_norm": 0.5553449392318726, |
|
"learning_rate": 9.846153846153848e-05, |
|
"loss": 2.2388, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.14828063697207478, |
|
"grad_norm": 0.8331669569015503, |
|
"learning_rate": 9.884615384615386e-05, |
|
"loss": 2.2571, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.1488576044311101, |
|
"grad_norm": 0.437548965215683, |
|
"learning_rate": 9.923076923076923e-05, |
|
"loss": 2.2604, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.1494345718901454, |
|
"grad_norm": 0.42178213596343994, |
|
"learning_rate": 9.961538461538463e-05, |
|
"loss": 2.2477, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.1500115393491807, |
|
"grad_norm": 0.3375515341758728, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2502, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.150588506808216, |
|
"grad_norm": 0.42404118180274963, |
|
"learning_rate": 9.999988631159617e-05, |
|
"loss": 2.2535, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.15116547426725133, |
|
"grad_norm": 0.7891207933425903, |
|
"learning_rate": 9.999954524690165e-05, |
|
"loss": 2.2498, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.15174244172628665, |
|
"grad_norm": 1.1495307683944702, |
|
"learning_rate": 9.999897680746749e-05, |
|
"loss": 2.2489, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.15231940918532194, |
|
"grad_norm": 1.0135457515716553, |
|
"learning_rate": 9.999818099587866e-05, |
|
"loss": 2.2668, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.15289637664435726, |
|
"grad_norm": 0.676613450050354, |
|
"learning_rate": 9.999715781575412e-05, |
|
"loss": 2.2425, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.15347334410339256, |
|
"grad_norm": 0.4525866210460663, |
|
"learning_rate": 9.999590727174686e-05, |
|
"loss": 2.23, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.15405031156242788, |
|
"grad_norm": 0.35913950204849243, |
|
"learning_rate": 9.999442936954375e-05, |
|
"loss": 2.2487, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.1546272790214632, |
|
"grad_norm": 0.5417553186416626, |
|
"learning_rate": 9.999272411586562e-05, |
|
"loss": 2.2526, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.1552042464804985, |
|
"grad_norm": 0.3553147315979004, |
|
"learning_rate": 9.999079151846715e-05, |
|
"loss": 2.2419, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.1557812139395338, |
|
"grad_norm": 0.4257926642894745, |
|
"learning_rate": 9.998863158613692e-05, |
|
"loss": 2.2468, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.1563581813985691, |
|
"grad_norm": 0.3629404306411743, |
|
"learning_rate": 9.99862443286973e-05, |
|
"loss": 2.2514, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.15693514885760443, |
|
"grad_norm": 0.3703051805496216, |
|
"learning_rate": 9.998362975700442e-05, |
|
"loss": 2.2419, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.15751211631663975, |
|
"grad_norm": 0.5970242619514465, |
|
"learning_rate": 9.998078788294811e-05, |
|
"loss": 2.2395, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.15808908377567504, |
|
"grad_norm": 0.5654362440109253, |
|
"learning_rate": 9.997771871945195e-05, |
|
"loss": 2.2408, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.15866605123471036, |
|
"grad_norm": 0.3666626811027527, |
|
"learning_rate": 9.997442228047304e-05, |
|
"loss": 2.231, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.15924301869374569, |
|
"grad_norm": 0.5000890493392944, |
|
"learning_rate": 9.997089858100205e-05, |
|
"loss": 2.2351, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.15981998615278098, |
|
"grad_norm": 0.35561060905456543, |
|
"learning_rate": 9.996714763706318e-05, |
|
"loss": 2.2401, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.1603969536118163, |
|
"grad_norm": 0.803989589214325, |
|
"learning_rate": 9.996316946571391e-05, |
|
"loss": 2.2356, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.1609739210708516, |
|
"grad_norm": 0.7431883215904236, |
|
"learning_rate": 9.995896408504518e-05, |
|
"loss": 2.2353, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.16155088852988692, |
|
"grad_norm": 0.3578578233718872, |
|
"learning_rate": 9.995453151418106e-05, |
|
"loss": 2.2419, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.16212785598892224, |
|
"grad_norm": 0.3431973159313202, |
|
"learning_rate": 9.994987177327885e-05, |
|
"loss": 2.2316, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.16270482344795753, |
|
"grad_norm": 0.3207797706127167, |
|
"learning_rate": 9.99449848835289e-05, |
|
"loss": 2.2381, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.16328179090699285, |
|
"grad_norm": 0.5509916543960571, |
|
"learning_rate": 9.99398708671545e-05, |
|
"loss": 2.2297, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.16385875836602815, |
|
"grad_norm": 0.3692900538444519, |
|
"learning_rate": 9.993452974741185e-05, |
|
"loss": 2.2148, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.16443572582506347, |
|
"grad_norm": 0.9321035146713257, |
|
"learning_rate": 9.992896154858987e-05, |
|
"loss": 2.2323, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.1650126932840988, |
|
"grad_norm": 0.5195390582084656, |
|
"learning_rate": 9.992316629601012e-05, |
|
"loss": 2.2435, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.16558966074313408, |
|
"grad_norm": 0.407969206571579, |
|
"learning_rate": 9.991714401602676e-05, |
|
"loss": 2.2384, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.1661666282021694, |
|
"grad_norm": 0.3711942136287689, |
|
"learning_rate": 9.991089473602631e-05, |
|
"loss": 2.2422, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.1667435956612047, |
|
"grad_norm": 0.3882133364677429, |
|
"learning_rate": 9.99044184844276e-05, |
|
"loss": 2.2175, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.16732056312024002, |
|
"grad_norm": 0.6136539578437805, |
|
"learning_rate": 9.989771529068162e-05, |
|
"loss": 2.2226, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.16789753057927534, |
|
"grad_norm": 0.48447325825691223, |
|
"learning_rate": 9.989078518527137e-05, |
|
"loss": 2.2111, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.16847449803831063, |
|
"grad_norm": 0.47318318486213684, |
|
"learning_rate": 9.988362819971175e-05, |
|
"loss": 2.2225, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.16905146549734595, |
|
"grad_norm": 0.5245835185050964, |
|
"learning_rate": 9.987624436654946e-05, |
|
"loss": 2.2251, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.16962843295638125, |
|
"grad_norm": 0.3767394721508026, |
|
"learning_rate": 9.98686337193627e-05, |
|
"loss": 2.2276, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.17020540041541657, |
|
"grad_norm": 0.5148694515228271, |
|
"learning_rate": 9.986079629276118e-05, |
|
"loss": 2.2154, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.1707823678744519, |
|
"grad_norm": 0.7400733232498169, |
|
"learning_rate": 9.985273212238586e-05, |
|
"loss": 2.2322, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.17135933533348718, |
|
"grad_norm": 0.5233339071273804, |
|
"learning_rate": 9.984444124490888e-05, |
|
"loss": 2.2257, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.1719363027925225, |
|
"grad_norm": 0.3448404371738434, |
|
"learning_rate": 9.98359236980333e-05, |
|
"loss": 2.2264, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.1725132702515578, |
|
"grad_norm": 0.38899293541908264, |
|
"learning_rate": 9.982717952049296e-05, |
|
"loss": 2.2255, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.17309023771059312, |
|
"grad_norm": 0.38548049330711365, |
|
"learning_rate": 9.98182087520523e-05, |
|
"loss": 2.2258, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.17366720516962844, |
|
"grad_norm": 0.4106454849243164, |
|
"learning_rate": 9.980901143350627e-05, |
|
"loss": 2.2252, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.17424417262866373, |
|
"grad_norm": 0.31857094168663025, |
|
"learning_rate": 9.979958760667997e-05, |
|
"loss": 2.2277, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.17482114008769906, |
|
"grad_norm": 0.41078636050224304, |
|
"learning_rate": 9.97899373144286e-05, |
|
"loss": 2.2281, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.17539810754673438, |
|
"grad_norm": 0.411409467458725, |
|
"learning_rate": 9.978006060063723e-05, |
|
"loss": 2.2331, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.17597507500576967, |
|
"grad_norm": 0.9199445247650146, |
|
"learning_rate": 9.976995751022054e-05, |
|
"loss": 2.2305, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.176552042464805, |
|
"grad_norm": 0.5076198577880859, |
|
"learning_rate": 9.975962808912271e-05, |
|
"loss": 2.2105, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.17712900992384029, |
|
"grad_norm": 0.5077535510063171, |
|
"learning_rate": 9.974907238431716e-05, |
|
"loss": 2.2286, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.1777059773828756, |
|
"grad_norm": 0.398752897977829, |
|
"learning_rate": 9.973829044380637e-05, |
|
"loss": 2.2134, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.17828294484191093, |
|
"grad_norm": 0.32480180263519287, |
|
"learning_rate": 9.972728231662157e-05, |
|
"loss": 2.2225, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.17885991230094622, |
|
"grad_norm": 0.49645814299583435, |
|
"learning_rate": 9.971604805282259e-05, |
|
"loss": 2.2179, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.17943687975998154, |
|
"grad_norm": 0.6805341243743896, |
|
"learning_rate": 9.97045877034977e-05, |
|
"loss": 2.2093, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.18001384721901684, |
|
"grad_norm": 0.3878099024295807, |
|
"learning_rate": 9.969290132076326e-05, |
|
"loss": 2.2088, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.18059081467805216, |
|
"grad_norm": 0.34581634402275085, |
|
"learning_rate": 9.968098895776348e-05, |
|
"loss": 2.232, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.18116778213708748, |
|
"grad_norm": 0.36607232689857483, |
|
"learning_rate": 9.966885066867027e-05, |
|
"loss": 2.2202, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.18174474959612277, |
|
"grad_norm": 0.3712445795536041, |
|
"learning_rate": 9.965648650868293e-05, |
|
"loss": 2.203, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.1823217170551581, |
|
"grad_norm": 0.3221428096294403, |
|
"learning_rate": 9.964389653402797e-05, |
|
"loss": 2.2184, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.1828986845141934, |
|
"grad_norm": 0.4847376048564911, |
|
"learning_rate": 9.963108080195869e-05, |
|
"loss": 2.2127, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.1834756519732287, |
|
"grad_norm": 0.5290243625640869, |
|
"learning_rate": 9.961803937075516e-05, |
|
"loss": 2.2163, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.18405261943226403, |
|
"grad_norm": 0.3758445084095001, |
|
"learning_rate": 9.960477229972372e-05, |
|
"loss": 2.2056, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.18462958689129932, |
|
"grad_norm": 0.3582039773464203, |
|
"learning_rate": 9.959127964919684e-05, |
|
"loss": 2.214, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.18520655435033465, |
|
"grad_norm": 0.4270158112049103, |
|
"learning_rate": 9.957756148053288e-05, |
|
"loss": 2.2075, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.18578352180936994, |
|
"grad_norm": 0.5898721218109131, |
|
"learning_rate": 9.956361785611568e-05, |
|
"loss": 2.2099, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.18636048926840526, |
|
"grad_norm": 0.3439638912677765, |
|
"learning_rate": 9.95494488393544e-05, |
|
"loss": 2.2077, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.18693745672744058, |
|
"grad_norm": 0.37806159257888794, |
|
"learning_rate": 9.953505449468313e-05, |
|
"loss": 2.2226, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.18751442418647588, |
|
"grad_norm": 0.4258742332458496, |
|
"learning_rate": 9.952043488756066e-05, |
|
"loss": 2.2137, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.1880913916455112, |
|
"grad_norm": 0.4259757697582245, |
|
"learning_rate": 9.950559008447024e-05, |
|
"loss": 2.21, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.1886683591045465, |
|
"grad_norm": 0.3395422399044037, |
|
"learning_rate": 9.94905201529191e-05, |
|
"loss": 2.211, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.1892453265635818, |
|
"grad_norm": 0.407440721988678, |
|
"learning_rate": 9.94752251614383e-05, |
|
"loss": 2.215, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.18982229402261713, |
|
"grad_norm": 0.3814033269882202, |
|
"learning_rate": 9.945970517958238e-05, |
|
"loss": 2.2249, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.19039926148165243, |
|
"grad_norm": 0.5478818416595459, |
|
"learning_rate": 9.944396027792904e-05, |
|
"loss": 2.2032, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.19097622894068775, |
|
"grad_norm": 0.5092093348503113, |
|
"learning_rate": 9.942799052807874e-05, |
|
"loss": 2.2157, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.19155319639972307, |
|
"grad_norm": 0.4795791506767273, |
|
"learning_rate": 9.941179600265454e-05, |
|
"loss": 2.2013, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.19213016385875836, |
|
"grad_norm": 0.6791263222694397, |
|
"learning_rate": 9.93953767753016e-05, |
|
"loss": 2.1997, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.19270713131779368, |
|
"grad_norm": 0.48331305384635925, |
|
"learning_rate": 9.937873292068696e-05, |
|
"loss": 2.1939, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.19328409877682898, |
|
"grad_norm": 0.41813355684280396, |
|
"learning_rate": 9.936186451449916e-05, |
|
"loss": 2.2098, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.1938610662358643, |
|
"grad_norm": 0.3425246775150299, |
|
"learning_rate": 9.934477163344787e-05, |
|
"loss": 2.2023, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.19443803369489962, |
|
"grad_norm": 0.5277093648910522, |
|
"learning_rate": 9.93274543552636e-05, |
|
"loss": 2.199, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.1950150011539349, |
|
"grad_norm": 0.4232868552207947, |
|
"learning_rate": 9.930991275869727e-05, |
|
"loss": 2.1923, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.19559196861297024, |
|
"grad_norm": 0.6466622352600098, |
|
"learning_rate": 9.929214692351998e-05, |
|
"loss": 2.2013, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.19616893607200553, |
|
"grad_norm": 0.48593297600746155, |
|
"learning_rate": 9.927415693052245e-05, |
|
"loss": 2.2042, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.19674590353104085, |
|
"grad_norm": 0.6403309106826782, |
|
"learning_rate": 9.925594286151487e-05, |
|
"loss": 2.188, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.19732287099007617, |
|
"grad_norm": 0.4060690402984619, |
|
"learning_rate": 9.923750479932635e-05, |
|
"loss": 2.2146, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.19789983844911146, |
|
"grad_norm": 0.3018631339073181, |
|
"learning_rate": 9.921884282780463e-05, |
|
"loss": 2.2013, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.1984768059081468, |
|
"grad_norm": 0.3768801689147949, |
|
"learning_rate": 9.919995703181573e-05, |
|
"loss": 2.1963, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.19905377336718208, |
|
"grad_norm": 0.5511378645896912, |
|
"learning_rate": 9.918084749724348e-05, |
|
"loss": 2.1979, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.1996307408262174, |
|
"grad_norm": 0.5576174855232239, |
|
"learning_rate": 9.916151431098918e-05, |
|
"loss": 2.1961, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.20020770828525272, |
|
"grad_norm": 0.4171042740345001, |
|
"learning_rate": 9.91419575609712e-05, |
|
"loss": 2.2061, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.20078467574428802, |
|
"grad_norm": 0.3103582561016083, |
|
"learning_rate": 9.912217733612455e-05, |
|
"loss": 2.1972, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.20136164320332334, |
|
"grad_norm": 0.39360588788986206, |
|
"learning_rate": 9.910217372640052e-05, |
|
"loss": 2.2038, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.20193861066235863, |
|
"grad_norm": 0.3420889377593994, |
|
"learning_rate": 9.908194682276626e-05, |
|
"loss": 2.2043, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.20251557812139395, |
|
"grad_norm": 0.30040401220321655, |
|
"learning_rate": 9.906149671720436e-05, |
|
"loss": 2.2147, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.20309254558042927, |
|
"grad_norm": 0.3862975835800171, |
|
"learning_rate": 9.904082350271238e-05, |
|
"loss": 2.216, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.20366951303946457, |
|
"grad_norm": 0.5109495520591736, |
|
"learning_rate": 9.901992727330254e-05, |
|
"loss": 2.194, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.2042464804984999, |
|
"grad_norm": 0.5943346619606018, |
|
"learning_rate": 9.899880812400118e-05, |
|
"loss": 2.1829, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.20482344795753518, |
|
"grad_norm": 0.38951772451400757, |
|
"learning_rate": 9.89774661508484e-05, |
|
"loss": 2.2003, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.2054004154165705, |
|
"grad_norm": 0.4462457001209259, |
|
"learning_rate": 9.895590145089759e-05, |
|
"loss": 2.2059, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.20597738287560582, |
|
"grad_norm": 0.3021964132785797, |
|
"learning_rate": 9.8934114122215e-05, |
|
"loss": 2.1936, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.20655435033464112, |
|
"grad_norm": 0.3611934781074524, |
|
"learning_rate": 9.891210426387931e-05, |
|
"loss": 2.2004, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.20713131779367644, |
|
"grad_norm": 0.6618946194648743, |
|
"learning_rate": 9.888987197598115e-05, |
|
"loss": 2.1984, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.20770828525271176, |
|
"grad_norm": 0.6646985411643982, |
|
"learning_rate": 9.886741735962261e-05, |
|
"loss": 2.1952, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.20828525271174705, |
|
"grad_norm": 0.7527153491973877, |
|
"learning_rate": 9.884474051691693e-05, |
|
"loss": 2.1803, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.20886222017078238, |
|
"grad_norm": 0.5738996267318726, |
|
"learning_rate": 9.882184155098784e-05, |
|
"loss": 2.2139, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.20943918762981767, |
|
"grad_norm": 0.3935083746910095, |
|
"learning_rate": 9.879872056596922e-05, |
|
"loss": 2.189, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.210016155088853, |
|
"grad_norm": 0.4685090482234955, |
|
"learning_rate": 9.877537766700458e-05, |
|
"loss": 2.1919, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.2105931225478883, |
|
"grad_norm": 0.4964298903942108, |
|
"learning_rate": 9.87518129602466e-05, |
|
"loss": 2.1843, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.2111700900069236, |
|
"grad_norm": 0.33545660972595215, |
|
"learning_rate": 9.872802655285663e-05, |
|
"loss": 2.1826, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.21174705746595893, |
|
"grad_norm": 0.3587926924228668, |
|
"learning_rate": 9.870401855300423e-05, |
|
"loss": 2.2016, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.21232402492499422, |
|
"grad_norm": 0.6967931985855103, |
|
"learning_rate": 9.867978906986665e-05, |
|
"loss": 2.1923, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.21290099238402954, |
|
"grad_norm": 0.5460529923439026, |
|
"learning_rate": 9.865533821362832e-05, |
|
"loss": 2.1825, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.21347795984306486, |
|
"grad_norm": 0.32380005717277527, |
|
"learning_rate": 9.863066609548042e-05, |
|
"loss": 2.194, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.21405492730210016, |
|
"grad_norm": 0.4450247287750244, |
|
"learning_rate": 9.860577282762027e-05, |
|
"loss": 2.2023, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.21463189476113548, |
|
"grad_norm": 0.3208085298538208, |
|
"learning_rate": 9.858065852325094e-05, |
|
"loss": 2.1782, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.21520886222017077, |
|
"grad_norm": 0.34282881021499634, |
|
"learning_rate": 9.85553232965806e-05, |
|
"loss": 2.186, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.2157858296792061, |
|
"grad_norm": 0.4419040381908417, |
|
"learning_rate": 9.852976726282213e-05, |
|
"loss": 2.1831, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.21636279713824141, |
|
"grad_norm": 0.706082820892334, |
|
"learning_rate": 9.85039905381925e-05, |
|
"loss": 2.1929, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.2169397645972767, |
|
"grad_norm": 0.2975011467933655, |
|
"learning_rate": 9.847799323991234e-05, |
|
"loss": 2.1937, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.21751673205631203, |
|
"grad_norm": 0.36291176080703735, |
|
"learning_rate": 9.845177548620525e-05, |
|
"loss": 2.1949, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.21809369951534732, |
|
"grad_norm": 0.31104329228401184, |
|
"learning_rate": 9.842533739629744e-05, |
|
"loss": 2.1829, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.21867066697438264, |
|
"grad_norm": 0.3598311245441437, |
|
"learning_rate": 9.83986790904171e-05, |
|
"loss": 2.1724, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.21924763443341796, |
|
"grad_norm": 0.717742383480072, |
|
"learning_rate": 9.837180068979379e-05, |
|
"loss": 2.1984, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.21982460189245326, |
|
"grad_norm": 0.5237902402877808, |
|
"learning_rate": 9.834470231665806e-05, |
|
"loss": 2.1791, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.22040156935148858, |
|
"grad_norm": 0.633453905582428, |
|
"learning_rate": 9.83173840942407e-05, |
|
"loss": 2.2034, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.22097853681052387, |
|
"grad_norm": 0.5341862440109253, |
|
"learning_rate": 9.828984614677234e-05, |
|
"loss": 2.1894, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.2215555042695592, |
|
"grad_norm": 0.38566407561302185, |
|
"learning_rate": 9.826208859948279e-05, |
|
"loss": 2.1807, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.22213247172859452, |
|
"grad_norm": 0.4219278395175934, |
|
"learning_rate": 9.823411157860048e-05, |
|
"loss": 2.1827, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.2227094391876298, |
|
"grad_norm": 0.3236340582370758, |
|
"learning_rate": 9.820591521135193e-05, |
|
"loss": 2.1748, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.22328640664666513, |
|
"grad_norm": 0.335493803024292, |
|
"learning_rate": 9.817749962596115e-05, |
|
"loss": 2.1778, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.22386337410570045, |
|
"grad_norm": 0.3819381594657898, |
|
"learning_rate": 9.814886495164905e-05, |
|
"loss": 2.1836, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.22444034156473575, |
|
"grad_norm": 0.370803564786911, |
|
"learning_rate": 9.812001131863281e-05, |
|
"loss": 2.1896, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.22501730902377107, |
|
"grad_norm": 0.406332790851593, |
|
"learning_rate": 9.809093885812543e-05, |
|
"loss": 2.187, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.22559427648280636, |
|
"grad_norm": 0.6314796209335327, |
|
"learning_rate": 9.806164770233493e-05, |
|
"loss": 2.196, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.22617124394184168, |
|
"grad_norm": 0.6008214950561523, |
|
"learning_rate": 9.803213798446389e-05, |
|
"loss": 2.1867, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.226748211400877, |
|
"grad_norm": 0.41306042671203613, |
|
"learning_rate": 9.800240983870886e-05, |
|
"loss": 2.1802, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.2273251788599123, |
|
"grad_norm": 0.3542007803916931, |
|
"learning_rate": 9.797246340025962e-05, |
|
"loss": 2.1872, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.22790214631894762, |
|
"grad_norm": 0.35550656914711, |
|
"learning_rate": 9.79422988052987e-05, |
|
"loss": 2.1811, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.2284791137779829, |
|
"grad_norm": 0.4810178875923157, |
|
"learning_rate": 9.791191619100069e-05, |
|
"loss": 2.1716, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.22905608123701823, |
|
"grad_norm": 0.4199106693267822, |
|
"learning_rate": 9.78813156955316e-05, |
|
"loss": 2.1816, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.22963304869605355, |
|
"grad_norm": 0.5093641877174377, |
|
"learning_rate": 9.785049745804833e-05, |
|
"loss": 2.1931, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.23021001615508885, |
|
"grad_norm": 0.32428303360939026, |
|
"learning_rate": 9.78194616186979e-05, |
|
"loss": 2.1839, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.23078698361412417, |
|
"grad_norm": 0.4841248691082001, |
|
"learning_rate": 9.778820831861693e-05, |
|
"loss": 2.1695, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.23136395107315946, |
|
"grad_norm": 0.4829302430152893, |
|
"learning_rate": 9.775673769993092e-05, |
|
"loss": 2.1786, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.23194091853219478, |
|
"grad_norm": 0.6401785612106323, |
|
"learning_rate": 9.772504990575365e-05, |
|
"loss": 2.1814, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.2325178859912301, |
|
"grad_norm": 0.3681984543800354, |
|
"learning_rate": 9.76931450801865e-05, |
|
"loss": 2.1719, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.2330948534502654, |
|
"grad_norm": 0.35280972719192505, |
|
"learning_rate": 9.766102336831785e-05, |
|
"loss": 2.1695, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.23367182090930072, |
|
"grad_norm": 0.3713974058628082, |
|
"learning_rate": 9.76286849162223e-05, |
|
"loss": 2.1657, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.23424878836833601, |
|
"grad_norm": 0.7190024852752686, |
|
"learning_rate": 9.759612987096016e-05, |
|
"loss": 2.182, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.23482575582737134, |
|
"grad_norm": 0.6214960217475891, |
|
"learning_rate": 9.756335838057665e-05, |
|
"loss": 2.1656, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.23540272328640666, |
|
"grad_norm": 0.28883612155914307, |
|
"learning_rate": 9.753037059410134e-05, |
|
"loss": 2.1723, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.23597969074544195, |
|
"grad_norm": 0.4463951289653778, |
|
"learning_rate": 9.749716666154737e-05, |
|
"loss": 2.1799, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.23655665820447727, |
|
"grad_norm": 0.6259954571723938, |
|
"learning_rate": 9.746374673391082e-05, |
|
"loss": 2.173, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.23713362566351256, |
|
"grad_norm": 0.35418662428855896, |
|
"learning_rate": 9.743011096317e-05, |
|
"loss": 2.1809, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.2377105931225479, |
|
"grad_norm": 0.5010941028594971, |
|
"learning_rate": 9.739625950228484e-05, |
|
"loss": 2.179, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.2382875605815832, |
|
"grad_norm": 0.37506598234176636, |
|
"learning_rate": 9.736219250519604e-05, |
|
"loss": 2.1754, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.2388645280406185, |
|
"grad_norm": 0.5773160457611084, |
|
"learning_rate": 9.732791012682452e-05, |
|
"loss": 2.1694, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.23944149549965382, |
|
"grad_norm": 0.3286305069923401, |
|
"learning_rate": 9.729341252307063e-05, |
|
"loss": 2.1704, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.24001846295868912, |
|
"grad_norm": 0.40018871426582336, |
|
"learning_rate": 9.725869985081346e-05, |
|
"loss": 2.1802, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.24059543041772444, |
|
"grad_norm": 0.46409788727760315, |
|
"learning_rate": 9.722377226791017e-05, |
|
"loss": 2.1773, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.24117239787675976, |
|
"grad_norm": 0.4142248332500458, |
|
"learning_rate": 9.718862993319518e-05, |
|
"loss": 2.1747, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.24174936533579505, |
|
"grad_norm": 0.5945191979408264, |
|
"learning_rate": 9.715327300647953e-05, |
|
"loss": 2.1689, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.24232633279483037, |
|
"grad_norm": 0.44592514634132385, |
|
"learning_rate": 9.711770164855015e-05, |
|
"loss": 2.1694, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.2429033002538657, |
|
"grad_norm": 0.2986921966075897, |
|
"learning_rate": 9.708191602116905e-05, |
|
"loss": 2.1637, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.243480267712901, |
|
"grad_norm": 0.4355255663394928, |
|
"learning_rate": 9.704591628707267e-05, |
|
"loss": 2.152, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.2440572351719363, |
|
"grad_norm": 0.46000930666923523, |
|
"learning_rate": 9.70097026099711e-05, |
|
"loss": 2.1636, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.2446342026309716, |
|
"grad_norm": 0.3301403820514679, |
|
"learning_rate": 9.697327515454735e-05, |
|
"loss": 2.1596, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.24521117009000692, |
|
"grad_norm": 0.4323584735393524, |
|
"learning_rate": 9.693663408645658e-05, |
|
"loss": 2.1723, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.24578813754904225, |
|
"grad_norm": 0.28445109724998474, |
|
"learning_rate": 9.689977957232539e-05, |
|
"loss": 2.1728, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.24636510500807754, |
|
"grad_norm": 0.3563917279243469, |
|
"learning_rate": 9.686271177975099e-05, |
|
"loss": 2.162, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.24694207246711286, |
|
"grad_norm": 0.29485827684402466, |
|
"learning_rate": 9.682543087730054e-05, |
|
"loss": 2.1781, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.24751903992614815, |
|
"grad_norm": 0.4161454141139984, |
|
"learning_rate": 9.678793703451028e-05, |
|
"loss": 2.17, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.24809600738518348, |
|
"grad_norm": 0.3382590413093567, |
|
"learning_rate": 9.67502304218848e-05, |
|
"loss": 2.1748, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.2486729748442188, |
|
"grad_norm": 0.3781547248363495, |
|
"learning_rate": 9.671231121089629e-05, |
|
"loss": 2.152, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.2492499423032541, |
|
"grad_norm": 0.2784830331802368, |
|
"learning_rate": 9.667417957398375e-05, |
|
"loss": 2.1656, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.2498269097622894, |
|
"grad_norm": 0.3041353225708008, |
|
"learning_rate": 9.663583568455214e-05, |
|
"loss": 2.1665, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.25040387722132473, |
|
"grad_norm": 0.30996307730674744, |
|
"learning_rate": 9.659727971697174e-05, |
|
"loss": 2.1701, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.25098084468036, |
|
"grad_norm": 0.42379140853881836, |
|
"learning_rate": 9.655851184657716e-05, |
|
"loss": 2.158, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.2515578121393953, |
|
"grad_norm": 0.5364662408828735, |
|
"learning_rate": 9.65195322496667e-05, |
|
"loss": 2.1658, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.25213477959843067, |
|
"grad_norm": 0.447578102350235, |
|
"learning_rate": 9.648034110350151e-05, |
|
"loss": 2.1626, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.25271174705746596, |
|
"grad_norm": 0.4996080696582794, |
|
"learning_rate": 9.644093858630471e-05, |
|
"loss": 2.1659, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.25328871451650126, |
|
"grad_norm": 0.2662523090839386, |
|
"learning_rate": 9.640132487726069e-05, |
|
"loss": 2.1824, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.2538656819755366, |
|
"grad_norm": 0.2966640293598175, |
|
"learning_rate": 9.636150015651422e-05, |
|
"loss": 2.1665, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.2544426494345719, |
|
"grad_norm": 0.3168790638446808, |
|
"learning_rate": 9.632146460516967e-05, |
|
"loss": 2.1814, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.2550196168936072, |
|
"grad_norm": 0.7422902584075928, |
|
"learning_rate": 9.628121840529015e-05, |
|
"loss": 2.1674, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.2555965843526425, |
|
"grad_norm": 0.447091668844223, |
|
"learning_rate": 9.624076173989672e-05, |
|
"loss": 2.1606, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.25617355181167784, |
|
"grad_norm": 0.37554842233657837, |
|
"learning_rate": 9.620009479296748e-05, |
|
"loss": 2.158, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.25675051927071313, |
|
"grad_norm": 0.353153795003891, |
|
"learning_rate": 9.61592177494369e-05, |
|
"loss": 2.1627, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.2573274867297484, |
|
"grad_norm": 0.4235435724258423, |
|
"learning_rate": 9.611813079519479e-05, |
|
"loss": 2.1617, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.25790445418878377, |
|
"grad_norm": 0.34072136878967285, |
|
"learning_rate": 9.607683411708555e-05, |
|
"loss": 2.1651, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.25848142164781907, |
|
"grad_norm": 0.33193790912628174, |
|
"learning_rate": 9.603532790290733e-05, |
|
"loss": 2.1592, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.25905838910685436, |
|
"grad_norm": 0.3969743549823761, |
|
"learning_rate": 9.599361234141113e-05, |
|
"loss": 2.1709, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.2596353565658897, |
|
"grad_norm": 0.5880393981933594, |
|
"learning_rate": 9.595168762229998e-05, |
|
"loss": 2.1589, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.260212324024925, |
|
"grad_norm": 0.4169381856918335, |
|
"learning_rate": 9.590955393622804e-05, |
|
"loss": 2.166, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.2607892914839603, |
|
"grad_norm": 0.5424039959907532, |
|
"learning_rate": 9.586721147479983e-05, |
|
"loss": 2.1592, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.2613662589429956, |
|
"grad_norm": 0.6034404039382935, |
|
"learning_rate": 9.582466043056914e-05, |
|
"loss": 2.1546, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.26194322640203094, |
|
"grad_norm": 0.34911131858825684, |
|
"learning_rate": 9.578190099703845e-05, |
|
"loss": 2.1667, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.26252019386106623, |
|
"grad_norm": 0.40795624256134033, |
|
"learning_rate": 9.573893336865778e-05, |
|
"loss": 2.1544, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.2630971613201015, |
|
"grad_norm": 0.36005672812461853, |
|
"learning_rate": 9.569575774082401e-05, |
|
"loss": 2.1547, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.2636741287791369, |
|
"grad_norm": 0.3152981102466583, |
|
"learning_rate": 9.565237430987986e-05, |
|
"loss": 2.1557, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.26425109623817217, |
|
"grad_norm": 0.33613887429237366, |
|
"learning_rate": 9.560878327311302e-05, |
|
"loss": 2.1617, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.26482806369720746, |
|
"grad_norm": 0.4851570427417755, |
|
"learning_rate": 9.556498482875535e-05, |
|
"loss": 2.1462, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.2654050311562428, |
|
"grad_norm": 0.2919342517852783, |
|
"learning_rate": 9.552097917598184e-05, |
|
"loss": 2.1683, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.2659819986152781, |
|
"grad_norm": 0.31903278827667236, |
|
"learning_rate": 9.547676651490978e-05, |
|
"loss": 2.1608, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.2665589660743134, |
|
"grad_norm": 1.5958030223846436, |
|
"learning_rate": 9.543234704659784e-05, |
|
"loss": 2.1548, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.26713593353334875, |
|
"grad_norm": 0.26471462845802307, |
|
"learning_rate": 9.538772097304521e-05, |
|
"loss": 2.1566, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.26771290099238404, |
|
"grad_norm": 0.45223453640937805, |
|
"learning_rate": 9.534288849719049e-05, |
|
"loss": 2.1653, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.26828986845141933, |
|
"grad_norm": 0.33436188101768494, |
|
"learning_rate": 9.529784982291105e-05, |
|
"loss": 2.1547, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.2688668359104546, |
|
"grad_norm": 0.34297022223472595, |
|
"learning_rate": 9.525260515502185e-05, |
|
"loss": 2.1604, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.26944380336949, |
|
"grad_norm": 0.572670578956604, |
|
"learning_rate": 9.520715469927468e-05, |
|
"loss": 2.155, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.27002077082852527, |
|
"grad_norm": 0.28033772110939026, |
|
"learning_rate": 9.516149866235713e-05, |
|
"loss": 2.1731, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.27059773828756056, |
|
"grad_norm": 0.460891455411911, |
|
"learning_rate": 9.511563725189165e-05, |
|
"loss": 2.153, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.2711747057465959, |
|
"grad_norm": 0.34910324215888977, |
|
"learning_rate": 9.506957067643469e-05, |
|
"loss": 2.1617, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.2717516732056312, |
|
"grad_norm": 0.4306391775608063, |
|
"learning_rate": 9.502329914547567e-05, |
|
"loss": 2.1362, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.2723286406646665, |
|
"grad_norm": 0.39422643184661865, |
|
"learning_rate": 9.497682286943604e-05, |
|
"loss": 2.1513, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.27290560812370185, |
|
"grad_norm": 0.3839055001735687, |
|
"learning_rate": 9.493014205966832e-05, |
|
"loss": 2.1553, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.27348257558273714, |
|
"grad_norm": 0.31915026903152466, |
|
"learning_rate": 9.488325692845522e-05, |
|
"loss": 2.155, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.27405954304177244, |
|
"grad_norm": 0.29322880506515503, |
|
"learning_rate": 9.483616768900854e-05, |
|
"loss": 2.1543, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.27463651050080773, |
|
"grad_norm": 0.4080626666545868, |
|
"learning_rate": 9.478887455546832e-05, |
|
"loss": 2.1478, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.2752134779598431, |
|
"grad_norm": 0.42490825057029724, |
|
"learning_rate": 9.47413777429018e-05, |
|
"loss": 2.1449, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.27579044541887837, |
|
"grad_norm": 0.29601389169692993, |
|
"learning_rate": 9.469367746730243e-05, |
|
"loss": 2.1489, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.27636741287791367, |
|
"grad_norm": 0.28761008381843567, |
|
"learning_rate": 9.464577394558893e-05, |
|
"loss": 2.1542, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.276944380336949, |
|
"grad_norm": 0.35729655623435974, |
|
"learning_rate": 9.459766739560433e-05, |
|
"loss": 2.146, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.2775213477959843, |
|
"grad_norm": 0.5332178473472595, |
|
"learning_rate": 9.45493580361149e-05, |
|
"loss": 2.1498, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.2780983152550196, |
|
"grad_norm": 0.3445925712585449, |
|
"learning_rate": 9.450084608680915e-05, |
|
"loss": 2.1369, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.27867528271405495, |
|
"grad_norm": 0.37524721026420593, |
|
"learning_rate": 9.4452131768297e-05, |
|
"loss": 2.1465, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.27925225017309024, |
|
"grad_norm": 0.35284727811813354, |
|
"learning_rate": 9.440321530210852e-05, |
|
"loss": 2.145, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.27982921763212554, |
|
"grad_norm": 0.387416273355484, |
|
"learning_rate": 9.435409691069312e-05, |
|
"loss": 2.147, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.28040618509116083, |
|
"grad_norm": 0.33263909816741943, |
|
"learning_rate": 9.430477681741847e-05, |
|
"loss": 2.1449, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.2809831525501962, |
|
"grad_norm": 0.31773439049720764, |
|
"learning_rate": 9.425525524656948e-05, |
|
"loss": 2.1528, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.2815601200092315, |
|
"grad_norm": 0.3170188367366791, |
|
"learning_rate": 9.420553242334727e-05, |
|
"loss": 2.1564, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.28213708746826677, |
|
"grad_norm": 0.4686289429664612, |
|
"learning_rate": 9.415560857386817e-05, |
|
"loss": 2.1557, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.2827140549273021, |
|
"grad_norm": 0.2960648834705353, |
|
"learning_rate": 9.410548392516271e-05, |
|
"loss": 2.1348, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.2832910223863374, |
|
"grad_norm": 0.6153275370597839, |
|
"learning_rate": 9.405515870517455e-05, |
|
"loss": 2.1498, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.2838679898453727, |
|
"grad_norm": 0.549048900604248, |
|
"learning_rate": 9.400463314275943e-05, |
|
"loss": 2.1461, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.28444495730440805, |
|
"grad_norm": 0.2764069736003876, |
|
"learning_rate": 9.395390746768415e-05, |
|
"loss": 2.1421, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.28502192476344335, |
|
"grad_norm": 0.5139395594596863, |
|
"learning_rate": 9.39029819106256e-05, |
|
"loss": 2.1489, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.28559889222247864, |
|
"grad_norm": 0.30540773272514343, |
|
"learning_rate": 9.385185670316957e-05, |
|
"loss": 2.1571, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.286175859681514, |
|
"grad_norm": 0.3731764853000641, |
|
"learning_rate": 9.380053207780977e-05, |
|
"loss": 2.146, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.2867528271405493, |
|
"grad_norm": 0.3358662724494934, |
|
"learning_rate": 9.37490082679468e-05, |
|
"loss": 2.1426, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.2873297945995846, |
|
"grad_norm": 0.37629401683807373, |
|
"learning_rate": 9.369728550788706e-05, |
|
"loss": 2.1478, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.28790676205861987, |
|
"grad_norm": 0.2608135938644409, |
|
"learning_rate": 9.364536403284167e-05, |
|
"loss": 2.147, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.2884837295176552, |
|
"grad_norm": 0.3051717281341553, |
|
"learning_rate": 9.359324407892542e-05, |
|
"loss": 2.1509, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2890606969766905, |
|
"grad_norm": 0.2828206717967987, |
|
"learning_rate": 9.354092588315566e-05, |
|
"loss": 2.1467, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.2896376644357258, |
|
"grad_norm": 0.3248600959777832, |
|
"learning_rate": 9.348840968345129e-05, |
|
"loss": 2.1441, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.29021463189476115, |
|
"grad_norm": 0.322581022977829, |
|
"learning_rate": 9.343569571863165e-05, |
|
"loss": 2.1526, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.29079159935379645, |
|
"grad_norm": 0.5681893825531006, |
|
"learning_rate": 9.338278422841536e-05, |
|
"loss": 2.1577, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.29136856681283174, |
|
"grad_norm": 0.27868878841400146, |
|
"learning_rate": 9.332967545341935e-05, |
|
"loss": 2.1542, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.2919455342718671, |
|
"grad_norm": 0.26688241958618164, |
|
"learning_rate": 9.327636963515771e-05, |
|
"loss": 2.1497, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.2925225017309024, |
|
"grad_norm": 0.4590870440006256, |
|
"learning_rate": 9.322286701604056e-05, |
|
"loss": 2.1481, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.2930994691899377, |
|
"grad_norm": 0.44435915350914, |
|
"learning_rate": 9.316916783937299e-05, |
|
"loss": 2.1551, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.29367643664897297, |
|
"grad_norm": 0.5026682019233704, |
|
"learning_rate": 9.311527234935397e-05, |
|
"loss": 2.1594, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.2942534041080083, |
|
"grad_norm": 0.28176602721214294, |
|
"learning_rate": 9.306118079107518e-05, |
|
"loss": 2.1427, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.2948303715670436, |
|
"grad_norm": 0.3687533140182495, |
|
"learning_rate": 9.300689341051992e-05, |
|
"loss": 2.136, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.2954073390260789, |
|
"grad_norm": 0.35213086009025574, |
|
"learning_rate": 9.295241045456204e-05, |
|
"loss": 2.1411, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.29598430648511426, |
|
"grad_norm": 0.26359936594963074, |
|
"learning_rate": 9.289773217096473e-05, |
|
"loss": 2.1484, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.29656127394414955, |
|
"grad_norm": 0.37725865840911865, |
|
"learning_rate": 9.284285880837946e-05, |
|
"loss": 2.1444, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.29713824140318484, |
|
"grad_norm": 0.5063810348510742, |
|
"learning_rate": 9.278779061634486e-05, |
|
"loss": 2.1468, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.2977152088622202, |
|
"grad_norm": 0.40431004762649536, |
|
"learning_rate": 9.27325278452855e-05, |
|
"loss": 2.1479, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.2982921763212555, |
|
"grad_norm": 0.47989556193351746, |
|
"learning_rate": 9.267707074651084e-05, |
|
"loss": 2.1465, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.2988691437802908, |
|
"grad_norm": 0.5401825308799744, |
|
"learning_rate": 9.262141957221403e-05, |
|
"loss": 2.1406, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.29944611123932613, |
|
"grad_norm": 0.4957210123538971, |
|
"learning_rate": 9.256557457547081e-05, |
|
"loss": 2.1657, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.3000230786983614, |
|
"grad_norm": 0.46101513504981995, |
|
"learning_rate": 9.25095360102383e-05, |
|
"loss": 2.1387, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.3006000461573967, |
|
"grad_norm": 0.5260946154594421, |
|
"learning_rate": 9.245330413135395e-05, |
|
"loss": 2.1417, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.301177013616432, |
|
"grad_norm": 0.36784660816192627, |
|
"learning_rate": 9.239687919453421e-05, |
|
"loss": 2.1502, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.30175398107546736, |
|
"grad_norm": 0.3011978566646576, |
|
"learning_rate": 9.234026145637353e-05, |
|
"loss": 2.1464, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.30233094853450265, |
|
"grad_norm": 0.31565114855766296, |
|
"learning_rate": 9.228345117434314e-05, |
|
"loss": 2.1237, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.30290791599353795, |
|
"grad_norm": 0.2920317053794861, |
|
"learning_rate": 9.222644860678984e-05, |
|
"loss": 2.1287, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.3034848834525733, |
|
"grad_norm": 0.31231454014778137, |
|
"learning_rate": 9.216925401293488e-05, |
|
"loss": 2.1552, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.3040618509116086, |
|
"grad_norm": 0.361036479473114, |
|
"learning_rate": 9.211186765287271e-05, |
|
"loss": 2.1483, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.3046388183706439, |
|
"grad_norm": 0.29103997349739075, |
|
"learning_rate": 9.205428978756991e-05, |
|
"loss": 2.1469, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.30521578582967923, |
|
"grad_norm": 0.38782352209091187, |
|
"learning_rate": 9.199652067886391e-05, |
|
"loss": 2.1371, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.3057927532887145, |
|
"grad_norm": 0.3866995871067047, |
|
"learning_rate": 9.193856058946179e-05, |
|
"loss": 2.1287, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.3063697207477498, |
|
"grad_norm": 0.3839791417121887, |
|
"learning_rate": 9.188040978293916e-05, |
|
"loss": 2.1424, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.3069466882067851, |
|
"grad_norm": 0.6226606369018555, |
|
"learning_rate": 9.182206852373892e-05, |
|
"loss": 2.1447, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.30752365566582046, |
|
"grad_norm": 0.6132786870002747, |
|
"learning_rate": 9.176353707717007e-05, |
|
"loss": 2.1326, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.30810062312485575, |
|
"grad_norm": 0.4488280117511749, |
|
"learning_rate": 9.170481570940645e-05, |
|
"loss": 2.1291, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.30867759058389105, |
|
"grad_norm": 0.4035676419734955, |
|
"learning_rate": 9.164590468748563e-05, |
|
"loss": 2.1426, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.3092545580429264, |
|
"grad_norm": 0.403726190328598, |
|
"learning_rate": 9.15868042793076e-05, |
|
"loss": 2.1416, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.3098315255019617, |
|
"grad_norm": 0.3956368565559387, |
|
"learning_rate": 9.152751475363359e-05, |
|
"loss": 2.1317, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.310408492960997, |
|
"grad_norm": 0.3403710722923279, |
|
"learning_rate": 9.14680363800849e-05, |
|
"loss": 2.1368, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.31098546042003233, |
|
"grad_norm": 0.28579944372177124, |
|
"learning_rate": 9.140836942914154e-05, |
|
"loss": 2.1484, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.3115624278790676, |
|
"grad_norm": 0.3212498724460602, |
|
"learning_rate": 9.134851417214114e-05, |
|
"loss": 2.1337, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.3121393953381029, |
|
"grad_norm": 0.2878890335559845, |
|
"learning_rate": 9.128847088127768e-05, |
|
"loss": 2.1359, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.3127163627971382, |
|
"grad_norm": 0.47753390669822693, |
|
"learning_rate": 9.122823982960014e-05, |
|
"loss": 2.1397, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.31329333025617356, |
|
"grad_norm": 0.4015023410320282, |
|
"learning_rate": 9.116782129101144e-05, |
|
"loss": 2.1283, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.31387029771520886, |
|
"grad_norm": 0.4427224099636078, |
|
"learning_rate": 9.110721554026705e-05, |
|
"loss": 2.1355, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.31444726517424415, |
|
"grad_norm": 0.2725686728954315, |
|
"learning_rate": 9.104642285297385e-05, |
|
"loss": 2.1425, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.3150242326332795, |
|
"grad_norm": 0.31836792826652527, |
|
"learning_rate": 9.098544350558872e-05, |
|
"loss": 2.128, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.3156012000923148, |
|
"grad_norm": 0.2776530981063843, |
|
"learning_rate": 9.092427777541751e-05, |
|
"loss": 2.1353, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.3161781675513501, |
|
"grad_norm": 0.28507447242736816, |
|
"learning_rate": 9.086292594061356e-05, |
|
"loss": 2.1378, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.31675513501038544, |
|
"grad_norm": 0.30143943428993225, |
|
"learning_rate": 9.080138828017655e-05, |
|
"loss": 2.1194, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.31733210246942073, |
|
"grad_norm": 0.36624717712402344, |
|
"learning_rate": 9.073966507395122e-05, |
|
"loss": 2.1274, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.317909069928456, |
|
"grad_norm": 0.3170357346534729, |
|
"learning_rate": 9.067775660262609e-05, |
|
"loss": 2.1253, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.31848603738749137, |
|
"grad_norm": 0.3736599087715149, |
|
"learning_rate": 9.061566314773217e-05, |
|
"loss": 2.1236, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.31906300484652667, |
|
"grad_norm": 0.2892272472381592, |
|
"learning_rate": 9.055338499164171e-05, |
|
"loss": 2.1235, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.31963997230556196, |
|
"grad_norm": 0.5071966052055359, |
|
"learning_rate": 9.049092241756683e-05, |
|
"loss": 2.1211, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.32021693976459725, |
|
"grad_norm": 0.5542040467262268, |
|
"learning_rate": 9.042827570955838e-05, |
|
"loss": 2.1413, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.3207939072236326, |
|
"grad_norm": 0.39468914270401, |
|
"learning_rate": 9.036544515250452e-05, |
|
"loss": 2.1484, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.3213708746826679, |
|
"grad_norm": 0.45103156566619873, |
|
"learning_rate": 9.030243103212948e-05, |
|
"loss": 2.1349, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.3219478421417032, |
|
"grad_norm": 0.41866421699523926, |
|
"learning_rate": 9.023923363499225e-05, |
|
"loss": 2.1318, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.32252480960073854, |
|
"grad_norm": 0.3836067020893097, |
|
"learning_rate": 9.017585324848528e-05, |
|
"loss": 2.1367, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.32310177705977383, |
|
"grad_norm": 0.3563435971736908, |
|
"learning_rate": 9.011229016083315e-05, |
|
"loss": 2.1396, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.3236787445188091, |
|
"grad_norm": 0.38590380549430847, |
|
"learning_rate": 9.004854466109134e-05, |
|
"loss": 2.1345, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.3242557119778445, |
|
"grad_norm": 0.3448795676231384, |
|
"learning_rate": 8.998461703914477e-05, |
|
"loss": 2.1233, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.32483267943687977, |
|
"grad_norm": 0.34533822536468506, |
|
"learning_rate": 8.992050758570665e-05, |
|
"loss": 2.1474, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.32540964689591506, |
|
"grad_norm": 0.3775763511657715, |
|
"learning_rate": 8.9856216592317e-05, |
|
"loss": 2.1239, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.32598661435495035, |
|
"grad_norm": 0.5391921401023865, |
|
"learning_rate": 8.979174435134146e-05, |
|
"loss": 2.1213, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.3265635818139857, |
|
"grad_norm": 0.4273969233036041, |
|
"learning_rate": 8.972709115596989e-05, |
|
"loss": 2.1356, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.327140549273021, |
|
"grad_norm": 0.4936189353466034, |
|
"learning_rate": 8.9662257300215e-05, |
|
"loss": 2.1347, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.3277175167320563, |
|
"grad_norm": 0.39583414793014526, |
|
"learning_rate": 8.959724307891113e-05, |
|
"loss": 2.1279, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.32829448419109164, |
|
"grad_norm": 0.27078336477279663, |
|
"learning_rate": 8.953204878771276e-05, |
|
"loss": 2.1386, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.32887145165012693, |
|
"grad_norm": 0.2853717803955078, |
|
"learning_rate": 8.946667472309332e-05, |
|
"loss": 2.1206, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.3294484191091622, |
|
"grad_norm": 0.2861252725124359, |
|
"learning_rate": 8.940112118234372e-05, |
|
"loss": 2.1211, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.3300253865681976, |
|
"grad_norm": 0.5010199546813965, |
|
"learning_rate": 8.933538846357105e-05, |
|
"loss": 2.1313, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.33060235402723287, |
|
"grad_norm": 0.3199447989463806, |
|
"learning_rate": 8.926947686569724e-05, |
|
"loss": 2.1281, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.33117932148626816, |
|
"grad_norm": 0.3005349040031433, |
|
"learning_rate": 8.920338668845764e-05, |
|
"loss": 2.1341, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.3317562889453035, |
|
"grad_norm": 0.2689361274242401, |
|
"learning_rate": 8.913711823239977e-05, |
|
"loss": 2.1323, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.3323332564043388, |
|
"grad_norm": 0.538209855556488, |
|
"learning_rate": 8.907067179888177e-05, |
|
"loss": 2.1425, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.3329102238633741, |
|
"grad_norm": 0.45525121688842773, |
|
"learning_rate": 8.900404769007123e-05, |
|
"loss": 2.1286, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.3334871913224094, |
|
"grad_norm": 0.379524290561676, |
|
"learning_rate": 8.893724620894369e-05, |
|
"loss": 2.1284, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.33406415878144474, |
|
"grad_norm": 0.31159836053848267, |
|
"learning_rate": 8.887026765928129e-05, |
|
"loss": 2.1309, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.33464112624048004, |
|
"grad_norm": 0.31788134574890137, |
|
"learning_rate": 8.880311234567142e-05, |
|
"loss": 2.1339, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.33521809369951533, |
|
"grad_norm": 0.2826105058193207, |
|
"learning_rate": 8.873578057350531e-05, |
|
"loss": 2.1195, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.3357950611585507, |
|
"grad_norm": 0.3322320282459259, |
|
"learning_rate": 8.86682726489766e-05, |
|
"loss": 2.1323, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.33637202861758597, |
|
"grad_norm": 0.35983988642692566, |
|
"learning_rate": 8.860058887908e-05, |
|
"loss": 2.1319, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.33694899607662127, |
|
"grad_norm": 0.27813997864723206, |
|
"learning_rate": 8.853272957160997e-05, |
|
"loss": 2.1251, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.3375259635356566, |
|
"grad_norm": 0.3000926971435547, |
|
"learning_rate": 8.846469503515909e-05, |
|
"loss": 2.1238, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.3381029309946919, |
|
"grad_norm": 0.2732325494289398, |
|
"learning_rate": 8.839648557911691e-05, |
|
"loss": 2.1302, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.3386798984537272, |
|
"grad_norm": 0.3154725134372711, |
|
"learning_rate": 8.832810151366841e-05, |
|
"loss": 2.1185, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.3392568659127625, |
|
"grad_norm": 0.3048752546310425, |
|
"learning_rate": 8.825954314979258e-05, |
|
"loss": 2.1053, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.33983383337179784, |
|
"grad_norm": 0.6547893285751343, |
|
"learning_rate": 8.819081079926106e-05, |
|
"loss": 2.1069, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.34041080083083314, |
|
"grad_norm": 0.27823716402053833, |
|
"learning_rate": 8.812190477463669e-05, |
|
"loss": 2.1345, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.34098776828986843, |
|
"grad_norm": 0.2662949860095978, |
|
"learning_rate": 8.805282538927214e-05, |
|
"loss": 2.1311, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.3415647357489038, |
|
"grad_norm": 0.3390512466430664, |
|
"learning_rate": 8.798357295730836e-05, |
|
"loss": 2.108, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.3421417032079391, |
|
"grad_norm": 0.4059408903121948, |
|
"learning_rate": 8.791414779367335e-05, |
|
"loss": 2.1185, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.34271867066697437, |
|
"grad_norm": 0.5488471388816833, |
|
"learning_rate": 8.784455021408051e-05, |
|
"loss": 2.1193, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.3432956381260097, |
|
"grad_norm": 0.29119235277175903, |
|
"learning_rate": 8.777478053502736e-05, |
|
"loss": 2.1249, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.343872605585045, |
|
"grad_norm": 0.268136203289032, |
|
"learning_rate": 8.770483907379403e-05, |
|
"loss": 2.1307, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.3444495730440803, |
|
"grad_norm": 0.2661965787410736, |
|
"learning_rate": 8.763472614844185e-05, |
|
"loss": 2.1146, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.3450265405031156, |
|
"grad_norm": 0.4353127181529999, |
|
"learning_rate": 8.756444207781192e-05, |
|
"loss": 2.1215, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.34560350796215095, |
|
"grad_norm": 0.5630121827125549, |
|
"learning_rate": 8.749398718152352e-05, |
|
"loss": 2.1231, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.34618047542118624, |
|
"grad_norm": 0.369564026594162, |
|
"learning_rate": 8.742336177997289e-05, |
|
"loss": 2.1185, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.34675744288022153, |
|
"grad_norm": 0.3091670572757721, |
|
"learning_rate": 8.735256619433157e-05, |
|
"loss": 2.1088, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.3473344103392569, |
|
"grad_norm": 0.27325648069381714, |
|
"learning_rate": 8.728160074654507e-05, |
|
"loss": 2.1254, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.3479113777982922, |
|
"grad_norm": 0.266166090965271, |
|
"learning_rate": 8.721046575933132e-05, |
|
"loss": 2.112, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.34848834525732747, |
|
"grad_norm": 0.3261997699737549, |
|
"learning_rate": 8.713916155617922e-05, |
|
"loss": 2.1253, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.3490653127163628, |
|
"grad_norm": 0.34902575612068176, |
|
"learning_rate": 8.706768846134727e-05, |
|
"loss": 2.1253, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.3496422801753981, |
|
"grad_norm": 0.2903362810611725, |
|
"learning_rate": 8.699604679986188e-05, |
|
"loss": 2.1239, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.3502192476344334, |
|
"grad_norm": 0.2810317575931549, |
|
"learning_rate": 8.692423689751617e-05, |
|
"loss": 2.1183, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.35079621509346876, |
|
"grad_norm": 0.3162137269973755, |
|
"learning_rate": 8.68522590808682e-05, |
|
"loss": 2.1171, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.35137318255250405, |
|
"grad_norm": 0.3248831331729889, |
|
"learning_rate": 8.678011367723973e-05, |
|
"loss": 2.1262, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.35195015001153934, |
|
"grad_norm": 0.35751304030418396, |
|
"learning_rate": 8.670780101471458e-05, |
|
"loss": 2.1107, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.35252711747057464, |
|
"grad_norm": 0.30080780386924744, |
|
"learning_rate": 8.663532142213721e-05, |
|
"loss": 2.1197, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.35310408492961, |
|
"grad_norm": 0.24629926681518555, |
|
"learning_rate": 8.656267522911119e-05, |
|
"loss": 2.137, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.3536810523886453, |
|
"grad_norm": 0.24670863151550293, |
|
"learning_rate": 8.648986276599769e-05, |
|
"loss": 2.1134, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.35425801984768057, |
|
"grad_norm": 0.5493544340133667, |
|
"learning_rate": 8.641688436391402e-05, |
|
"loss": 2.1163, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.3548349873067159, |
|
"grad_norm": 0.34867388010025024, |
|
"learning_rate": 8.634374035473212e-05, |
|
"loss": 2.1076, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.3554119547657512, |
|
"grad_norm": 0.38117578625679016, |
|
"learning_rate": 8.6270431071077e-05, |
|
"loss": 2.1163, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.3559889222247865, |
|
"grad_norm": 0.3320949673652649, |
|
"learning_rate": 8.619695684632529e-05, |
|
"loss": 2.1048, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.35656588968382186, |
|
"grad_norm": 0.36235710978507996, |
|
"learning_rate": 8.612331801460367e-05, |
|
"loss": 2.1019, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 0.28610482811927795, |
|
"learning_rate": 8.604951491078739e-05, |
|
"loss": 2.1179, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.35771982460189244, |
|
"grad_norm": 0.4073322117328644, |
|
"learning_rate": 8.597554787049875e-05, |
|
"loss": 2.1277, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.35829679206092774, |
|
"grad_norm": 0.2900354862213135, |
|
"learning_rate": 8.590141723010552e-05, |
|
"loss": 2.1257, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.3588737595199631, |
|
"grad_norm": 0.3767155408859253, |
|
"learning_rate": 8.582712332671947e-05, |
|
"loss": 2.1198, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.3594507269789984, |
|
"grad_norm": 0.2501465976238251, |
|
"learning_rate": 8.575266649819484e-05, |
|
"loss": 2.1187, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.3600276944380337, |
|
"grad_norm": 0.30487820506095886, |
|
"learning_rate": 8.56780470831267e-05, |
|
"loss": 2.1127, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.360604661897069, |
|
"grad_norm": 0.33418163657188416, |
|
"learning_rate": 8.560326542084959e-05, |
|
"loss": 2.1206, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.3611816293561043, |
|
"grad_norm": 0.39887627959251404, |
|
"learning_rate": 8.552832185143577e-05, |
|
"loss": 2.109, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.3617585968151396, |
|
"grad_norm": 0.3675427734851837, |
|
"learning_rate": 8.545321671569389e-05, |
|
"loss": 2.1245, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.36233556427417496, |
|
"grad_norm": 0.2894626259803772, |
|
"learning_rate": 8.537795035516722e-05, |
|
"loss": 2.1208, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.36291253173321025, |
|
"grad_norm": 0.3343421518802643, |
|
"learning_rate": 8.53025231121323e-05, |
|
"loss": 2.1077, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.36348949919224555, |
|
"grad_norm": 0.4454777240753174, |
|
"learning_rate": 8.522693532959721e-05, |
|
"loss": 2.1155, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.36406646665128084, |
|
"grad_norm": 0.37064430117607117, |
|
"learning_rate": 8.515118735130011e-05, |
|
"loss": 2.0999, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.3646434341103162, |
|
"grad_norm": 0.429360568523407, |
|
"learning_rate": 8.507527952170771e-05, |
|
"loss": 2.1192, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.3652204015693515, |
|
"grad_norm": 0.4052377939224243, |
|
"learning_rate": 8.499921218601358e-05, |
|
"loss": 2.1094, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.3657973690283868, |
|
"grad_norm": 0.2919637858867645, |
|
"learning_rate": 8.49229856901367e-05, |
|
"loss": 2.1169, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.3663743364874221, |
|
"grad_norm": 0.44187116622924805, |
|
"learning_rate": 8.484660038071981e-05, |
|
"loss": 2.101, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.3669513039464574, |
|
"grad_norm": 0.4569217264652252, |
|
"learning_rate": 8.477005660512787e-05, |
|
"loss": 2.1241, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.3675282714054927, |
|
"grad_norm": 0.40709832310676575, |
|
"learning_rate": 8.469335471144646e-05, |
|
"loss": 2.1134, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.36810523886452806, |
|
"grad_norm": 0.289991170167923, |
|
"learning_rate": 8.46164950484802e-05, |
|
"loss": 2.1119, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.36868220632356336, |
|
"grad_norm": 0.30788782238960266, |
|
"learning_rate": 8.453947796575122e-05, |
|
"loss": 2.1171, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.36925917378259865, |
|
"grad_norm": 0.26853644847869873, |
|
"learning_rate": 8.446230381349745e-05, |
|
"loss": 2.1118, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.369836141241634, |
|
"grad_norm": 0.3687596023082733, |
|
"learning_rate": 8.438497294267117e-05, |
|
"loss": 2.1341, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.3704131087006693, |
|
"grad_norm": 0.4256872534751892, |
|
"learning_rate": 8.430748570493729e-05, |
|
"loss": 2.1198, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.3709900761597046, |
|
"grad_norm": 0.27723926305770874, |
|
"learning_rate": 8.422984245267184e-05, |
|
"loss": 2.1193, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.3715670436187399, |
|
"grad_norm": 0.3537181317806244, |
|
"learning_rate": 8.415204353896031e-05, |
|
"loss": 2.1111, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.3721440110777752, |
|
"grad_norm": 0.30080363154411316, |
|
"learning_rate": 8.407408931759607e-05, |
|
"loss": 2.0906, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.3727209785368105, |
|
"grad_norm": 0.3747290074825287, |
|
"learning_rate": 8.399598014307877e-05, |
|
"loss": 2.1012, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.3732979459958458, |
|
"grad_norm": 0.5772930383682251, |
|
"learning_rate": 8.391771637061268e-05, |
|
"loss": 2.1125, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.37387491345488116, |
|
"grad_norm": 0.5180211067199707, |
|
"learning_rate": 8.383929835610516e-05, |
|
"loss": 2.102, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.37445188091391646, |
|
"grad_norm": 0.5194865465164185, |
|
"learning_rate": 8.376072645616496e-05, |
|
"loss": 2.1105, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.37502884837295175, |
|
"grad_norm": 0.2418607920408249, |
|
"learning_rate": 8.368200102810065e-05, |
|
"loss": 2.1099, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.3756058158319871, |
|
"grad_norm": 0.29503995180130005, |
|
"learning_rate": 8.360312242991892e-05, |
|
"loss": 2.1044, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.3761827832910224, |
|
"grad_norm": 0.3843310475349426, |
|
"learning_rate": 8.352409102032308e-05, |
|
"loss": 2.1014, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.3767597507500577, |
|
"grad_norm": 0.2821659743785858, |
|
"learning_rate": 8.344490715871132e-05, |
|
"loss": 2.1056, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.377336718209093, |
|
"grad_norm": 0.28183305263519287, |
|
"learning_rate": 8.336557120517509e-05, |
|
"loss": 2.0959, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.37791368566812833, |
|
"grad_norm": 0.27406924962997437, |
|
"learning_rate": 8.328608352049754e-05, |
|
"loss": 2.1111, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.3784906531271636, |
|
"grad_norm": 0.41145142912864685, |
|
"learning_rate": 8.320644446615175e-05, |
|
"loss": 2.1011, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.3790676205861989, |
|
"grad_norm": 0.368637353181839, |
|
"learning_rate": 8.312665440429925e-05, |
|
"loss": 2.1185, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.37964458804523427, |
|
"grad_norm": 0.42195016145706177, |
|
"learning_rate": 8.304671369778821e-05, |
|
"loss": 2.1148, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.38022155550426956, |
|
"grad_norm": 0.3770730793476105, |
|
"learning_rate": 8.296662271015185e-05, |
|
"loss": 2.1128, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.38079852296330485, |
|
"grad_norm": 0.31544458866119385, |
|
"learning_rate": 8.288638180560686e-05, |
|
"loss": 2.1162, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.3813754904223402, |
|
"grad_norm": 0.3706103265285492, |
|
"learning_rate": 8.280599134905167e-05, |
|
"loss": 2.1006, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.3819524578813755, |
|
"grad_norm": 0.4784095585346222, |
|
"learning_rate": 8.272545170606476e-05, |
|
"loss": 2.1131, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.3825294253404108, |
|
"grad_norm": 0.5787613987922668, |
|
"learning_rate": 8.264476324290309e-05, |
|
"loss": 2.1004, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.38310639279944614, |
|
"grad_norm": 0.5983478426933289, |
|
"learning_rate": 8.256392632650034e-05, |
|
"loss": 2.1085, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.38368336025848143, |
|
"grad_norm": 0.302979439496994, |
|
"learning_rate": 8.248294132446533e-05, |
|
"loss": 2.1015, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.3842603277175167, |
|
"grad_norm": 0.3463706970214844, |
|
"learning_rate": 8.240180860508027e-05, |
|
"loss": 2.101, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.384837295176552, |
|
"grad_norm": 0.32216259837150574, |
|
"learning_rate": 8.232052853729913e-05, |
|
"loss": 2.1197, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.38541426263558737, |
|
"grad_norm": 0.30237728357315063, |
|
"learning_rate": 8.223910149074598e-05, |
|
"loss": 2.098, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.38599123009462266, |
|
"grad_norm": 0.4659302532672882, |
|
"learning_rate": 8.215752783571325e-05, |
|
"loss": 2.1121, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.38656819755365796, |
|
"grad_norm": 0.314649373292923, |
|
"learning_rate": 8.207580794316009e-05, |
|
"loss": 2.0945, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.3871451650126933, |
|
"grad_norm": 0.29076725244522095, |
|
"learning_rate": 8.199394218471065e-05, |
|
"loss": 2.1033, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.3877221324717286, |
|
"grad_norm": 0.3016183376312256, |
|
"learning_rate": 8.191193093265241e-05, |
|
"loss": 2.0942, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.3882990999307639, |
|
"grad_norm": 0.29181423783302307, |
|
"learning_rate": 8.182977455993456e-05, |
|
"loss": 2.1083, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.38887606738979924, |
|
"grad_norm": 0.3460990786552429, |
|
"learning_rate": 8.174747344016613e-05, |
|
"loss": 2.0995, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.38945303484883453, |
|
"grad_norm": 0.2913837134838104, |
|
"learning_rate": 8.166502794761445e-05, |
|
"loss": 2.111, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.3900300023078698, |
|
"grad_norm": 0.3307998478412628, |
|
"learning_rate": 8.158243845720338e-05, |
|
"loss": 2.1161, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.3906069697669051, |
|
"grad_norm": 0.2763582766056061, |
|
"learning_rate": 8.149970534451161e-05, |
|
"loss": 2.1044, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.39118393722594047, |
|
"grad_norm": 0.2793087065219879, |
|
"learning_rate": 8.141682898577096e-05, |
|
"loss": 2.1016, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.39176090468497576, |
|
"grad_norm": 0.3368696868419647, |
|
"learning_rate": 8.133380975786469e-05, |
|
"loss": 2.1022, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.39233787214401106, |
|
"grad_norm": 0.3350260257720947, |
|
"learning_rate": 8.125064803832569e-05, |
|
"loss": 2.0958, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.3929148396030464, |
|
"grad_norm": 0.2954477369785309, |
|
"learning_rate": 8.116734420533493e-05, |
|
"loss": 2.1082, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.3934918070620817, |
|
"grad_norm": 0.302860289812088, |
|
"learning_rate": 8.108389863771959e-05, |
|
"loss": 2.0965, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.394068774521117, |
|
"grad_norm": 0.34222808480262756, |
|
"learning_rate": 8.100031171495142e-05, |
|
"loss": 2.1034, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.39464574198015234, |
|
"grad_norm": 0.27451932430267334, |
|
"learning_rate": 8.091658381714493e-05, |
|
"loss": 2.1104, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.39522270943918764, |
|
"grad_norm": 0.40477854013442993, |
|
"learning_rate": 8.083271532505581e-05, |
|
"loss": 2.0989, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.39579967689822293, |
|
"grad_norm": 0.3276130259037018, |
|
"learning_rate": 8.074870662007903e-05, |
|
"loss": 2.1041, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.3963766443572582, |
|
"grad_norm": 0.3322683572769165, |
|
"learning_rate": 8.066455808424722e-05, |
|
"loss": 2.1114, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.3969536118162936, |
|
"grad_norm": 0.29346489906311035, |
|
"learning_rate": 8.05802701002289e-05, |
|
"loss": 2.1009, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.39753057927532887, |
|
"grad_norm": 0.275181382894516, |
|
"learning_rate": 8.049584305132673e-05, |
|
"loss": 2.0983, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.39810754673436416, |
|
"grad_norm": 0.2939784526824951, |
|
"learning_rate": 8.041127732147573e-05, |
|
"loss": 2.1173, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.3986845141933995, |
|
"grad_norm": 0.3622361719608307, |
|
"learning_rate": 8.032657329524165e-05, |
|
"loss": 2.092, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.3992614816524348, |
|
"grad_norm": 0.4047354459762573, |
|
"learning_rate": 8.02417313578191e-05, |
|
"loss": 2.1023, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.3998384491114701, |
|
"grad_norm": 0.301595538854599, |
|
"learning_rate": 8.015675189502986e-05, |
|
"loss": 2.105, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.40041541657050544, |
|
"grad_norm": 0.39583104848861694, |
|
"learning_rate": 8.007163529332111e-05, |
|
"loss": 2.107, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.40099238402954074, |
|
"grad_norm": 0.26828697323799133, |
|
"learning_rate": 7.998638193976366e-05, |
|
"loss": 2.1087, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.40156935148857603, |
|
"grad_norm": 0.35258111357688904, |
|
"learning_rate": 7.990099222205023e-05, |
|
"loss": 2.1052, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.4021463189476114, |
|
"grad_norm": 0.3397604823112488, |
|
"learning_rate": 7.981546652849365e-05, |
|
"loss": 2.0931, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.4027232864066467, |
|
"grad_norm": 0.29654258489608765, |
|
"learning_rate": 7.972980524802509e-05, |
|
"loss": 2.0865, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.40330025386568197, |
|
"grad_norm": 0.30216071009635925, |
|
"learning_rate": 7.964400877019233e-05, |
|
"loss": 2.0926, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.40387722132471726, |
|
"grad_norm": 0.29403412342071533, |
|
"learning_rate": 7.955807748515797e-05, |
|
"loss": 2.1122, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.4044541887837526, |
|
"grad_norm": 0.3340148329734802, |
|
"learning_rate": 7.947201178369761e-05, |
|
"loss": 2.1019, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.4050311562427879, |
|
"grad_norm": 0.28326958417892456, |
|
"learning_rate": 7.938581205719815e-05, |
|
"loss": 2.089, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.4056081237018232, |
|
"grad_norm": 0.2603297829627991, |
|
"learning_rate": 7.929947869765596e-05, |
|
"loss": 2.0979, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.40618509116085855, |
|
"grad_norm": 0.375947505235672, |
|
"learning_rate": 7.92130120976751e-05, |
|
"loss": 2.1027, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.40676205861989384, |
|
"grad_norm": 0.29024848341941833, |
|
"learning_rate": 7.912641265046561e-05, |
|
"loss": 2.0944, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.40733902607892913, |
|
"grad_norm": 0.27378323674201965, |
|
"learning_rate": 7.903968074984155e-05, |
|
"loss": 2.0983, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.4079159935379645, |
|
"grad_norm": 0.38356778025627136, |
|
"learning_rate": 7.89528167902194e-05, |
|
"loss": 2.1056, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.4084929609969998, |
|
"grad_norm": 0.38269197940826416, |
|
"learning_rate": 7.886582116661615e-05, |
|
"loss": 2.098, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.40906992845603507, |
|
"grad_norm": 0.4965763986110687, |
|
"learning_rate": 7.877869427464755e-05, |
|
"loss": 2.1038, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.40964689591507036, |
|
"grad_norm": 0.28945809602737427, |
|
"learning_rate": 7.869143651052629e-05, |
|
"loss": 2.0898, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.4102238633741057, |
|
"grad_norm": 0.30477508902549744, |
|
"learning_rate": 7.86040482710602e-05, |
|
"loss": 2.1105, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.410800830833141, |
|
"grad_norm": 0.2908862829208374, |
|
"learning_rate": 7.851652995365044e-05, |
|
"loss": 2.1111, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.4113777982921763, |
|
"grad_norm": 0.3085802495479584, |
|
"learning_rate": 7.842888195628977e-05, |
|
"loss": 2.0996, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.41195476575121165, |
|
"grad_norm": 0.3265917897224426, |
|
"learning_rate": 7.834110467756059e-05, |
|
"loss": 2.0904, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.41253173321024694, |
|
"grad_norm": 0.37845543026924133, |
|
"learning_rate": 7.825319851663328e-05, |
|
"loss": 2.0892, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.41310870066928224, |
|
"grad_norm": 0.3401089906692505, |
|
"learning_rate": 7.816516387326425e-05, |
|
"loss": 2.1042, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.4136856681283176, |
|
"grad_norm": 0.28092026710510254, |
|
"learning_rate": 7.807700114779424e-05, |
|
"loss": 2.0867, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.4142626355873529, |
|
"grad_norm": 0.3313417434692383, |
|
"learning_rate": 7.798871074114642e-05, |
|
"loss": 2.0914, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.4148396030463882, |
|
"grad_norm": 0.3220118582248688, |
|
"learning_rate": 7.790029305482464e-05, |
|
"loss": 2.1001, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.4154165705054235, |
|
"grad_norm": 0.4763105809688568, |
|
"learning_rate": 7.781174849091147e-05, |
|
"loss": 2.0942, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.4159935379644588, |
|
"grad_norm": 0.29946181178092957, |
|
"learning_rate": 7.772307745206657e-05, |
|
"loss": 2.104, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.4165705054234941, |
|
"grad_norm": 0.3090571165084839, |
|
"learning_rate": 7.763428034152466e-05, |
|
"loss": 2.0946, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.4171474728825294, |
|
"grad_norm": 0.34957677125930786, |
|
"learning_rate": 7.754535756309383e-05, |
|
"loss": 2.1011, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.41772444034156475, |
|
"grad_norm": 0.3043264150619507, |
|
"learning_rate": 7.745630952115363e-05, |
|
"loss": 2.1069, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.41830140780060004, |
|
"grad_norm": 0.30251699686050415, |
|
"learning_rate": 7.736713662065322e-05, |
|
"loss": 2.0902, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.41887837525963534, |
|
"grad_norm": 0.3313656449317932, |
|
"learning_rate": 7.727783926710962e-05, |
|
"loss": 2.1037, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.4194553427186707, |
|
"grad_norm": 0.4356120526790619, |
|
"learning_rate": 7.718841786660577e-05, |
|
"loss": 2.1033, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.420032310177706, |
|
"grad_norm": 0.27830976247787476, |
|
"learning_rate": 7.709887282578871e-05, |
|
"loss": 2.0909, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.4206092776367413, |
|
"grad_norm": 0.32721787691116333, |
|
"learning_rate": 7.700920455186777e-05, |
|
"loss": 2.0885, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.4211862450957766, |
|
"grad_norm": 0.29251882433891296, |
|
"learning_rate": 7.691941345261267e-05, |
|
"loss": 2.1043, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.4217632125548119, |
|
"grad_norm": 0.3718348443508148, |
|
"learning_rate": 7.682949993635164e-05, |
|
"loss": 2.0776, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.4223401800138472, |
|
"grad_norm": 0.3022097945213318, |
|
"learning_rate": 7.673946441196967e-05, |
|
"loss": 2.0869, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.4229171474728825, |
|
"grad_norm": 0.35751357674598694, |
|
"learning_rate": 7.664930728890656e-05, |
|
"loss": 2.096, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.42349411493191785, |
|
"grad_norm": 0.2946200966835022, |
|
"learning_rate": 7.655902897715509e-05, |
|
"loss": 2.0876, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.42407108239095315, |
|
"grad_norm": 0.41840797662734985, |
|
"learning_rate": 7.646862988725916e-05, |
|
"loss": 2.0929, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.42464804984998844, |
|
"grad_norm": 0.3791830539703369, |
|
"learning_rate": 7.637811043031186e-05, |
|
"loss": 2.0857, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.4252250173090238, |
|
"grad_norm": 0.31222841143608093, |
|
"learning_rate": 7.628747101795374e-05, |
|
"loss": 2.0847, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.4258019847680591, |
|
"grad_norm": 0.33404964208602905, |
|
"learning_rate": 7.619671206237078e-05, |
|
"loss": 2.0947, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.4263789522270944, |
|
"grad_norm": 0.40400052070617676, |
|
"learning_rate": 7.610583397629261e-05, |
|
"loss": 2.0963, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.4269559196861297, |
|
"grad_norm": 0.282111257314682, |
|
"learning_rate": 7.601483717299062e-05, |
|
"loss": 2.0824, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.427532887145165, |
|
"grad_norm": 0.38018685579299927, |
|
"learning_rate": 7.592372206627605e-05, |
|
"loss": 2.1036, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.4281098546042003, |
|
"grad_norm": 0.3515368700027466, |
|
"learning_rate": 7.583248907049817e-05, |
|
"loss": 2.0904, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.4286868220632356, |
|
"grad_norm": 0.3954324424266815, |
|
"learning_rate": 7.57411386005423e-05, |
|
"loss": 2.0919, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.42926378952227096, |
|
"grad_norm": 0.44705700874328613, |
|
"learning_rate": 7.564967107182801e-05, |
|
"loss": 2.0923, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.42984075698130625, |
|
"grad_norm": 0.2849537432193756, |
|
"learning_rate": 7.555808690030719e-05, |
|
"loss": 2.0977, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.43041772444034154, |
|
"grad_norm": 0.356541246175766, |
|
"learning_rate": 7.546638650246218e-05, |
|
"loss": 2.1011, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.4309946918993769, |
|
"grad_norm": 0.30027782917022705, |
|
"learning_rate": 7.537457029530386e-05, |
|
"loss": 2.0788, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.4315716593584122, |
|
"grad_norm": 0.39448121190071106, |
|
"learning_rate": 7.528263869636976e-05, |
|
"loss": 2.0994, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.4321486268174475, |
|
"grad_norm": 0.2679554224014282, |
|
"learning_rate": 7.51905921237221e-05, |
|
"loss": 2.0924, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.43272559427648283, |
|
"grad_norm": 0.47223103046417236, |
|
"learning_rate": 7.509843099594606e-05, |
|
"loss": 2.0859, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4333025617355181, |
|
"grad_norm": 0.4030632972717285, |
|
"learning_rate": 7.500615573214766e-05, |
|
"loss": 2.095, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.4338795291945534, |
|
"grad_norm": 0.2585602402687073, |
|
"learning_rate": 7.491376675195199e-05, |
|
"loss": 2.0963, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.43445649665358876, |
|
"grad_norm": 0.24894532561302185, |
|
"learning_rate": 7.482126447550132e-05, |
|
"loss": 2.0907, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.43503346411262406, |
|
"grad_norm": 0.30701470375061035, |
|
"learning_rate": 7.472864932345307e-05, |
|
"loss": 2.0904, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.43561043157165935, |
|
"grad_norm": 0.303641676902771, |
|
"learning_rate": 7.4635921716978e-05, |
|
"loss": 2.0969, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.43618739903069464, |
|
"grad_norm": 0.2713119685649872, |
|
"learning_rate": 7.454308207775824e-05, |
|
"loss": 2.0691, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.43676436648973, |
|
"grad_norm": 0.3004838526248932, |
|
"learning_rate": 7.445013082798542e-05, |
|
"loss": 2.0791, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.4373413339487653, |
|
"grad_norm": 0.31855785846710205, |
|
"learning_rate": 7.435706839035869e-05, |
|
"loss": 2.0921, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.4379183014078006, |
|
"grad_norm": 0.2251572161912918, |
|
"learning_rate": 7.426389518808287e-05, |
|
"loss": 2.0784, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.43849526886683593, |
|
"grad_norm": 0.33400654792785645, |
|
"learning_rate": 7.417061164486644e-05, |
|
"loss": 2.0949, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.4390722363258712, |
|
"grad_norm": 0.27124184370040894, |
|
"learning_rate": 7.407721818491975e-05, |
|
"loss": 2.0815, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.4396492037849065, |
|
"grad_norm": 0.3008407950401306, |
|
"learning_rate": 7.398371523295286e-05, |
|
"loss": 2.0848, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.44022617124394187, |
|
"grad_norm": 0.2795826196670532, |
|
"learning_rate": 7.389010321417385e-05, |
|
"loss": 2.0879, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.44080313870297716, |
|
"grad_norm": 0.2528461515903473, |
|
"learning_rate": 7.379638255428677e-05, |
|
"loss": 2.0914, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.44138010616201245, |
|
"grad_norm": 0.5015778541564941, |
|
"learning_rate": 7.370255367948971e-05, |
|
"loss": 2.0744, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.44195707362104775, |
|
"grad_norm": 0.267972856760025, |
|
"learning_rate": 7.360861701647287e-05, |
|
"loss": 2.0914, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.4425340410800831, |
|
"grad_norm": 0.31362441182136536, |
|
"learning_rate": 7.351457299241659e-05, |
|
"loss": 2.0884, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.4431110085391184, |
|
"grad_norm": 0.2507345378398895, |
|
"learning_rate": 7.342042203498951e-05, |
|
"loss": 2.0877, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.4436879759981537, |
|
"grad_norm": 0.26686784625053406, |
|
"learning_rate": 7.33261645723465e-05, |
|
"loss": 2.0786, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.44426494345718903, |
|
"grad_norm": 0.2581067681312561, |
|
"learning_rate": 7.323180103312677e-05, |
|
"loss": 2.09, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.4448419109162243, |
|
"grad_norm": 0.27772027254104614, |
|
"learning_rate": 7.313733184645193e-05, |
|
"loss": 2.0982, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.4454188783752596, |
|
"grad_norm": 0.3083910048007965, |
|
"learning_rate": 7.304275744192402e-05, |
|
"loss": 2.0919, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.44599584583429497, |
|
"grad_norm": 0.29059112071990967, |
|
"learning_rate": 7.294807824962358e-05, |
|
"loss": 2.0845, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.44657281329333026, |
|
"grad_norm": 0.3928695321083069, |
|
"learning_rate": 7.285329470010765e-05, |
|
"loss": 2.0792, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.44714978075236556, |
|
"grad_norm": 0.379013329744339, |
|
"learning_rate": 7.275840722440784e-05, |
|
"loss": 2.0873, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.4477267482114009, |
|
"grad_norm": 0.2709653079509735, |
|
"learning_rate": 7.266341625402836e-05, |
|
"loss": 2.0761, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.4483037156704362, |
|
"grad_norm": 0.2680635452270508, |
|
"learning_rate": 7.256832222094413e-05, |
|
"loss": 2.0824, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.4488806831294715, |
|
"grad_norm": 0.22318679094314575, |
|
"learning_rate": 7.247312555759867e-05, |
|
"loss": 2.108, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.4494576505885068, |
|
"grad_norm": 0.27637118101119995, |
|
"learning_rate": 7.237782669690226e-05, |
|
"loss": 2.091, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.45003461804754213, |
|
"grad_norm": 0.24532774090766907, |
|
"learning_rate": 7.22824260722299e-05, |
|
"loss": 2.0868, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.45061158550657743, |
|
"grad_norm": 0.3002353310585022, |
|
"learning_rate": 7.21869241174194e-05, |
|
"loss": 2.0886, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.4511885529656127, |
|
"grad_norm": 0.2682434618473053, |
|
"learning_rate": 7.209132126676934e-05, |
|
"loss": 2.081, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.45176552042464807, |
|
"grad_norm": 0.35407841205596924, |
|
"learning_rate": 7.199561795503715e-05, |
|
"loss": 2.0778, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.45234248788368336, |
|
"grad_norm": 0.27851682901382446, |
|
"learning_rate": 7.189981461743706e-05, |
|
"loss": 2.0962, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.45291945534271866, |
|
"grad_norm": 0.30708226561546326, |
|
"learning_rate": 7.180391168963828e-05, |
|
"loss": 2.0771, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.453496422801754, |
|
"grad_norm": 0.2830917537212372, |
|
"learning_rate": 7.170790960776279e-05, |
|
"loss": 2.0888, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.4540733902607893, |
|
"grad_norm": 0.4283241629600525, |
|
"learning_rate": 7.161180880838355e-05, |
|
"loss": 2.0811, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.4546503577198246, |
|
"grad_norm": 0.48016196489334106, |
|
"learning_rate": 7.15156097285224e-05, |
|
"loss": 2.0915, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.4552273251788599, |
|
"grad_norm": 0.2744823396205902, |
|
"learning_rate": 7.141931280564815e-05, |
|
"loss": 2.0774, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.45580429263789524, |
|
"grad_norm": 0.2494378238916397, |
|
"learning_rate": 7.132291847767455e-05, |
|
"loss": 2.0788, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.45638126009693053, |
|
"grad_norm": 0.2590774893760681, |
|
"learning_rate": 7.122642718295826e-05, |
|
"loss": 2.0733, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.4569582275559658, |
|
"grad_norm": 0.37552034854888916, |
|
"learning_rate": 7.112983936029694e-05, |
|
"loss": 2.0732, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.4575351950150012, |
|
"grad_norm": 0.3342891037464142, |
|
"learning_rate": 7.103315544892723e-05, |
|
"loss": 2.0859, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.45811216247403647, |
|
"grad_norm": 0.31473156809806824, |
|
"learning_rate": 7.093637588852267e-05, |
|
"loss": 2.0821, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.45868912993307176, |
|
"grad_norm": 0.3879741132259369, |
|
"learning_rate": 7.083950111919187e-05, |
|
"loss": 2.0668, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.4592660973921071, |
|
"grad_norm": 0.2961817979812622, |
|
"learning_rate": 7.074253158147629e-05, |
|
"loss": 2.0782, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.4598430648511424, |
|
"grad_norm": 0.2857101559638977, |
|
"learning_rate": 7.064546771634844e-05, |
|
"loss": 2.0788, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.4604200323101777, |
|
"grad_norm": 0.3533235788345337, |
|
"learning_rate": 7.054830996520974e-05, |
|
"loss": 2.0796, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.460996999769213, |
|
"grad_norm": 0.3479328453540802, |
|
"learning_rate": 7.045105876988858e-05, |
|
"loss": 2.073, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.46157396722824834, |
|
"grad_norm": 0.3560927212238312, |
|
"learning_rate": 7.035371457263828e-05, |
|
"loss": 2.0701, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.46215093468728363, |
|
"grad_norm": 0.48714378476142883, |
|
"learning_rate": 7.02562778161351e-05, |
|
"loss": 2.078, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.4627279021463189, |
|
"grad_norm": 0.2729594111442566, |
|
"learning_rate": 7.015874894347622e-05, |
|
"loss": 2.0734, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.4633048696053543, |
|
"grad_norm": 0.3087742328643799, |
|
"learning_rate": 7.006112839817772e-05, |
|
"loss": 2.0764, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.46388183706438957, |
|
"grad_norm": 0.34214138984680176, |
|
"learning_rate": 6.996341662417255e-05, |
|
"loss": 2.071, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.46445880452342486, |
|
"grad_norm": 0.3290503919124603, |
|
"learning_rate": 6.986561406580853e-05, |
|
"loss": 2.0635, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.4650357719824602, |
|
"grad_norm": 0.2952798008918762, |
|
"learning_rate": 6.976772116784634e-05, |
|
"loss": 2.0825, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.4656127394414955, |
|
"grad_norm": 0.3497956693172455, |
|
"learning_rate": 6.966973837545747e-05, |
|
"loss": 2.0729, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.4661897069005308, |
|
"grad_norm": 0.3718845546245575, |
|
"learning_rate": 6.957166613422219e-05, |
|
"loss": 2.0671, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.46676667435956615, |
|
"grad_norm": 0.24571186304092407, |
|
"learning_rate": 6.947350489012761e-05, |
|
"loss": 2.076, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.46734364181860144, |
|
"grad_norm": 0.26415371894836426, |
|
"learning_rate": 6.93752550895655e-05, |
|
"loss": 2.0814, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.46792060927763673, |
|
"grad_norm": 0.3642018735408783, |
|
"learning_rate": 6.927691717933039e-05, |
|
"loss": 2.0691, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.46849757673667203, |
|
"grad_norm": 0.42086517810821533, |
|
"learning_rate": 6.917849160661747e-05, |
|
"loss": 2.0752, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.4690745441957074, |
|
"grad_norm": 0.29952847957611084, |
|
"learning_rate": 6.907997881902058e-05, |
|
"loss": 2.0783, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.46965151165474267, |
|
"grad_norm": 0.3071637451648712, |
|
"learning_rate": 6.898137926453023e-05, |
|
"loss": 2.066, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.47022847911377796, |
|
"grad_norm": 0.2822725474834442, |
|
"learning_rate": 6.888269339153144e-05, |
|
"loss": 2.0866, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.4708054465728133, |
|
"grad_norm": 0.32353177666664124, |
|
"learning_rate": 6.878392164880175e-05, |
|
"loss": 2.0795, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.4713824140318486, |
|
"grad_norm": 0.2756877839565277, |
|
"learning_rate": 6.868506448550928e-05, |
|
"loss": 2.0911, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.4719593814908839, |
|
"grad_norm": 0.29774340987205505, |
|
"learning_rate": 6.858612235121054e-05, |
|
"loss": 2.0805, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.47253634894991925, |
|
"grad_norm": 0.2543671727180481, |
|
"learning_rate": 6.848709569584842e-05, |
|
"loss": 2.077, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.47311331640895454, |
|
"grad_norm": 0.6061549782752991, |
|
"learning_rate": 6.838798496975027e-05, |
|
"loss": 2.0623, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.47369028386798984, |
|
"grad_norm": 0.26145073771476746, |
|
"learning_rate": 6.82887906236257e-05, |
|
"loss": 2.0843, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.47426725132702513, |
|
"grad_norm": 0.30414700508117676, |
|
"learning_rate": 6.818951310856456e-05, |
|
"loss": 2.0652, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.4748442187860605, |
|
"grad_norm": 0.2926621735095978, |
|
"learning_rate": 6.809015287603493e-05, |
|
"loss": 2.0845, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.4754211862450958, |
|
"grad_norm": 0.27972277998924255, |
|
"learning_rate": 6.799071037788108e-05, |
|
"loss": 2.0572, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.47599815370413107, |
|
"grad_norm": 0.3105740249156952, |
|
"learning_rate": 6.789118606632134e-05, |
|
"loss": 2.0678, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.4765751211631664, |
|
"grad_norm": 0.32389092445373535, |
|
"learning_rate": 6.779158039394616e-05, |
|
"loss": 2.0793, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.4771520886222017, |
|
"grad_norm": 0.3562007248401642, |
|
"learning_rate": 6.769189381371591e-05, |
|
"loss": 2.0802, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.477729056081237, |
|
"grad_norm": 0.2581557035446167, |
|
"learning_rate": 6.759212677895893e-05, |
|
"loss": 2.0635, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.47830602354027235, |
|
"grad_norm": 0.24171455204486847, |
|
"learning_rate": 6.749227974336938e-05, |
|
"loss": 2.07, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.47888299099930765, |
|
"grad_norm": 0.2817774713039398, |
|
"learning_rate": 6.739235316100532e-05, |
|
"loss": 2.0836, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.47945995845834294, |
|
"grad_norm": 0.2664335370063782, |
|
"learning_rate": 6.729234748628643e-05, |
|
"loss": 2.0824, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.48003692591737823, |
|
"grad_norm": 0.3076491057872772, |
|
"learning_rate": 6.719226317399219e-05, |
|
"loss": 2.0858, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.4806138933764136, |
|
"grad_norm": 0.37371984124183655, |
|
"learning_rate": 6.709210067925961e-05, |
|
"loss": 2.075, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.4811908608354489, |
|
"grad_norm": 0.29041746258735657, |
|
"learning_rate": 6.699186045758124e-05, |
|
"loss": 2.0636, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.48176782829448417, |
|
"grad_norm": 0.3501351773738861, |
|
"learning_rate": 6.689154296480313e-05, |
|
"loss": 2.0737, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.4823447957535195, |
|
"grad_norm": 0.3749564588069916, |
|
"learning_rate": 6.679114865712271e-05, |
|
"loss": 2.075, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.4829217632125548, |
|
"grad_norm": 0.364692747592926, |
|
"learning_rate": 6.669067799108671e-05, |
|
"loss": 2.0708, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.4834987306715901, |
|
"grad_norm": 0.34302058815956116, |
|
"learning_rate": 6.659013142358912e-05, |
|
"loss": 2.0702, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.48407569813062545, |
|
"grad_norm": 0.33347088098526, |
|
"learning_rate": 6.648950941186909e-05, |
|
"loss": 2.0759, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.48465266558966075, |
|
"grad_norm": 0.2980937063694, |
|
"learning_rate": 6.638881241350884e-05, |
|
"loss": 2.0607, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.48522963304869604, |
|
"grad_norm": 0.2834320664405823, |
|
"learning_rate": 6.628804088643165e-05, |
|
"loss": 2.0604, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.4858066005077314, |
|
"grad_norm": 0.2671113908290863, |
|
"learning_rate": 6.618719528889966e-05, |
|
"loss": 2.0687, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.4863835679667667, |
|
"grad_norm": 0.4396528899669647, |
|
"learning_rate": 6.608627607951185e-05, |
|
"loss": 2.0689, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.486960535425802, |
|
"grad_norm": 0.31399500370025635, |
|
"learning_rate": 6.598528371720201e-05, |
|
"loss": 2.0692, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.48753750288483727, |
|
"grad_norm": 0.23927593231201172, |
|
"learning_rate": 6.588421866123653e-05, |
|
"loss": 2.0836, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.4881144703438726, |
|
"grad_norm": 0.32067403197288513, |
|
"learning_rate": 6.578308137121242e-05, |
|
"loss": 2.0747, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.4886914378029079, |
|
"grad_norm": 0.25653791427612305, |
|
"learning_rate": 6.56818723070552e-05, |
|
"loss": 2.0697, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.4892684052619432, |
|
"grad_norm": 0.35823753476142883, |
|
"learning_rate": 6.558059192901667e-05, |
|
"loss": 2.0654, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.48984537272097856, |
|
"grad_norm": 0.3746614158153534, |
|
"learning_rate": 6.547924069767304e-05, |
|
"loss": 2.0759, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.49042234018001385, |
|
"grad_norm": 0.284355491399765, |
|
"learning_rate": 6.537781907392274e-05, |
|
"loss": 2.0752, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.49099930763904914, |
|
"grad_norm": 0.5059247016906738, |
|
"learning_rate": 6.527632751898423e-05, |
|
"loss": 2.0586, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.4915762750980845, |
|
"grad_norm": 0.3632018566131592, |
|
"learning_rate": 6.517476649439403e-05, |
|
"loss": 2.0768, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.4921532425571198, |
|
"grad_norm": 0.309618204832077, |
|
"learning_rate": 6.50731364620046e-05, |
|
"loss": 2.0766, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.4927302100161551, |
|
"grad_norm": 0.48799097537994385, |
|
"learning_rate": 6.497143788398215e-05, |
|
"loss": 2.0806, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.4933071774751904, |
|
"grad_norm": 0.25191518664360046, |
|
"learning_rate": 6.486967122280462e-05, |
|
"loss": 2.0661, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.4938841449342257, |
|
"grad_norm": 0.2887539565563202, |
|
"learning_rate": 6.476783694125964e-05, |
|
"loss": 2.0657, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.494461112393261, |
|
"grad_norm": 0.2615150213241577, |
|
"learning_rate": 6.466593550244227e-05, |
|
"loss": 2.0777, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.4950380798522963, |
|
"grad_norm": 0.3972332179546356, |
|
"learning_rate": 6.456396736975297e-05, |
|
"loss": 2.0726, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.49561504731133166, |
|
"grad_norm": 0.2527362108230591, |
|
"learning_rate": 6.446193300689553e-05, |
|
"loss": 2.0695, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.49619201477036695, |
|
"grad_norm": 0.27918368577957153, |
|
"learning_rate": 6.435983287787489e-05, |
|
"loss": 2.0726, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.49676898222940225, |
|
"grad_norm": 0.34241583943367004, |
|
"learning_rate": 6.425766744699507e-05, |
|
"loss": 2.0653, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.4973459496884376, |
|
"grad_norm": 0.33931684494018555, |
|
"learning_rate": 6.415543717885707e-05, |
|
"loss": 2.0674, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.4979229171474729, |
|
"grad_norm": 0.2961905598640442, |
|
"learning_rate": 6.405314253835675e-05, |
|
"loss": 2.057, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.4984998846065082, |
|
"grad_norm": 0.405836284160614, |
|
"learning_rate": 6.395078399068264e-05, |
|
"loss": 2.0448, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.49907685206554353, |
|
"grad_norm": 0.3253948986530304, |
|
"learning_rate": 6.384836200131398e-05, |
|
"loss": 2.0713, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.4996538195245788, |
|
"grad_norm": 0.3465583920478821, |
|
"learning_rate": 6.374587703601846e-05, |
|
"loss": 2.076, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.5002307869836141, |
|
"grad_norm": 0.25997796654701233, |
|
"learning_rate": 6.364332956085014e-05, |
|
"loss": 2.0584, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.5008077544426495, |
|
"grad_norm": 0.31124719977378845, |
|
"learning_rate": 6.35407200421474e-05, |
|
"loss": 2.0722, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.5013847219016847, |
|
"grad_norm": 0.41811326146125793, |
|
"learning_rate": 6.343804894653072e-05, |
|
"loss": 2.0436, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.50196168936072, |
|
"grad_norm": 0.3640015423297882, |
|
"learning_rate": 6.333531674090061e-05, |
|
"loss": 2.0728, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.5025386568197554, |
|
"grad_norm": 0.22485965490341187, |
|
"learning_rate": 6.323252389243553e-05, |
|
"loss": 2.0681, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.5031156242787906, |
|
"grad_norm": 0.2506142854690552, |
|
"learning_rate": 6.312967086858963e-05, |
|
"loss": 2.0669, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.503692591737826, |
|
"grad_norm": 0.3741120398044586, |
|
"learning_rate": 6.302675813709077e-05, |
|
"loss": 2.0548, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.5042695591968613, |
|
"grad_norm": 0.2388111650943756, |
|
"learning_rate": 6.292378616593833e-05, |
|
"loss": 2.075, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.5048465266558966, |
|
"grad_norm": 0.2684873044490814, |
|
"learning_rate": 6.282075542340105e-05, |
|
"loss": 2.0677, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.5054234941149319, |
|
"grad_norm": 0.2846076488494873, |
|
"learning_rate": 6.271766637801499e-05, |
|
"loss": 2.0608, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.5060004615739673, |
|
"grad_norm": 0.24689878523349762, |
|
"learning_rate": 6.261451949858127e-05, |
|
"loss": 2.0522, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.5065774290330025, |
|
"grad_norm": 0.4451148808002472, |
|
"learning_rate": 6.251131525416408e-05, |
|
"loss": 2.0595, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.5071543964920379, |
|
"grad_norm": 0.4091099500656128, |
|
"learning_rate": 6.240805411408844e-05, |
|
"loss": 2.0484, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.5077313639510732, |
|
"grad_norm": 0.24189534783363342, |
|
"learning_rate": 6.230473654793812e-05, |
|
"loss": 2.0602, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.5083083314101084, |
|
"grad_norm": 0.2729124426841736, |
|
"learning_rate": 6.220136302555349e-05, |
|
"loss": 2.0724, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.5088852988691438, |
|
"grad_norm": 0.29464176297187805, |
|
"learning_rate": 6.209793401702938e-05, |
|
"loss": 2.0466, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.509462266328179, |
|
"grad_norm": 0.36005499958992004, |
|
"learning_rate": 6.199444999271295e-05, |
|
"loss": 2.0682, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.5100392337872144, |
|
"grad_norm": 0.33139434456825256, |
|
"learning_rate": 6.189091142320154e-05, |
|
"loss": 2.0666, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.5106162012462497, |
|
"grad_norm": 0.23931476473808289, |
|
"learning_rate": 6.178731877934053e-05, |
|
"loss": 2.065, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.511193168705285, |
|
"grad_norm": 0.4048471748828888, |
|
"learning_rate": 6.168367253222121e-05, |
|
"loss": 2.0681, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.5117701361643203, |
|
"grad_norm": 0.32574188709259033, |
|
"learning_rate": 6.157997315317866e-05, |
|
"loss": 2.0524, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.5123471036233557, |
|
"grad_norm": 0.2571089267730713, |
|
"learning_rate": 6.147622111378953e-05, |
|
"loss": 2.0675, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.5129240710823909, |
|
"grad_norm": 0.30575376749038696, |
|
"learning_rate": 6.137241688586999e-05, |
|
"loss": 2.0588, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.5135010385414263, |
|
"grad_norm": 0.29877418279647827, |
|
"learning_rate": 6.12685609414735e-05, |
|
"loss": 2.0551, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.5140780060004616, |
|
"grad_norm": 0.2831680476665497, |
|
"learning_rate": 6.116465375288874e-05, |
|
"loss": 2.0544, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.5146549734594968, |
|
"grad_norm": 0.24778659641742706, |
|
"learning_rate": 6.106069579263739e-05, |
|
"loss": 2.0718, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.5152319409185322, |
|
"grad_norm": 0.2545316517353058, |
|
"learning_rate": 6.095668753347203e-05, |
|
"loss": 2.0755, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.5158089083775675, |
|
"grad_norm": 0.2952696979045868, |
|
"learning_rate": 6.085262944837401e-05, |
|
"loss": 2.0659, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.5163858758366028, |
|
"grad_norm": 0.3033768832683563, |
|
"learning_rate": 6.0748522010551215e-05, |
|
"loss": 2.0602, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.5169628432956381, |
|
"grad_norm": 0.28780362010002136, |
|
"learning_rate": 6.0644365693435966e-05, |
|
"loss": 2.0636, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.5175398107546735, |
|
"grad_norm": 0.3159029483795166, |
|
"learning_rate": 6.05401609706829e-05, |
|
"loss": 2.0676, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.5181167782137087, |
|
"grad_norm": 0.27421289682388306, |
|
"learning_rate": 6.043590831616677e-05, |
|
"loss": 2.0687, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.5186937456727441, |
|
"grad_norm": 0.27115151286125183, |
|
"learning_rate": 6.033160820398025e-05, |
|
"loss": 2.0566, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.5192707131317794, |
|
"grad_norm": 0.2915196120738983, |
|
"learning_rate": 6.022726110843192e-05, |
|
"loss": 2.0522, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5198476805908147, |
|
"grad_norm": 0.2636251151561737, |
|
"learning_rate": 6.0122867504043946e-05, |
|
"loss": 2.0512, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.52042464804985, |
|
"grad_norm": 0.23821695148944855, |
|
"learning_rate": 6.001842786555002e-05, |
|
"loss": 2.0418, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.5210016155088854, |
|
"grad_norm": 0.3464409410953522, |
|
"learning_rate": 5.991394266789319e-05, |
|
"loss": 2.0717, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.5215785829679206, |
|
"grad_norm": 0.315859854221344, |
|
"learning_rate": 5.9809412386223615e-05, |
|
"loss": 2.0537, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.5221555504269559, |
|
"grad_norm": 0.32564985752105713, |
|
"learning_rate": 5.9704837495896604e-05, |
|
"loss": 2.0756, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.5227325178859912, |
|
"grad_norm": 0.32612520456314087, |
|
"learning_rate": 5.9600218472470196e-05, |
|
"loss": 2.0571, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.5233094853450265, |
|
"grad_norm": 0.25429829955101013, |
|
"learning_rate": 5.9495555791703214e-05, |
|
"loss": 2.0685, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.5238864528040619, |
|
"grad_norm": 0.2449975311756134, |
|
"learning_rate": 5.939084992955297e-05, |
|
"loss": 2.0534, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.5244634202630971, |
|
"grad_norm": 0.4133405089378357, |
|
"learning_rate": 5.928610136217317e-05, |
|
"loss": 2.0582, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.5250403877221325, |
|
"grad_norm": 0.3849872350692749, |
|
"learning_rate": 5.918131056591169e-05, |
|
"loss": 2.0663, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.5256173551811678, |
|
"grad_norm": 0.41498827934265137, |
|
"learning_rate": 5.907647801730848e-05, |
|
"loss": 2.0535, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.526194322640203, |
|
"grad_norm": 0.34519997239112854, |
|
"learning_rate": 5.8971604193093324e-05, |
|
"loss": 2.0579, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.5267712900992384, |
|
"grad_norm": 0.2998815178871155, |
|
"learning_rate": 5.8866689570183766e-05, |
|
"loss": 2.068, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.5273482575582737, |
|
"grad_norm": 0.24292801320552826, |
|
"learning_rate": 5.876173462568282e-05, |
|
"loss": 2.0493, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.527925225017309, |
|
"grad_norm": 0.29909130930900574, |
|
"learning_rate": 5.86567398368769e-05, |
|
"loss": 2.0573, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.5285021924763443, |
|
"grad_norm": 0.256276935338974, |
|
"learning_rate": 5.8551705681233604e-05, |
|
"loss": 2.0545, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.5290791599353797, |
|
"grad_norm": 0.26847782731056213, |
|
"learning_rate": 5.8446632636399534e-05, |
|
"loss": 2.0723, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.5296561273944149, |
|
"grad_norm": 0.2674199342727661, |
|
"learning_rate": 5.8341521180198177e-05, |
|
"loss": 2.067, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.5302330948534503, |
|
"grad_norm": 0.26743456721305847, |
|
"learning_rate": 5.82363717906277e-05, |
|
"loss": 2.0697, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.5308100623124856, |
|
"grad_norm": 0.302480012178421, |
|
"learning_rate": 5.81311849458587e-05, |
|
"loss": 2.0561, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.5313870297715209, |
|
"grad_norm": 0.30073806643486023, |
|
"learning_rate": 5.802596112423221e-05, |
|
"loss": 2.0624, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.5319639972305562, |
|
"grad_norm": 0.38998448848724365, |
|
"learning_rate": 5.7920700804257346e-05, |
|
"loss": 2.0574, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.5325409646895916, |
|
"grad_norm": 0.4289480745792389, |
|
"learning_rate": 5.78154044646092e-05, |
|
"loss": 2.0635, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.5331179321486268, |
|
"grad_norm": 0.2888721823692322, |
|
"learning_rate": 5.771007258412669e-05, |
|
"loss": 2.0494, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.5336948996076621, |
|
"grad_norm": 0.30417218804359436, |
|
"learning_rate": 5.760470564181038e-05, |
|
"loss": 2.0512, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.5342718670666975, |
|
"grad_norm": 0.2877718508243561, |
|
"learning_rate": 5.749930411682022e-05, |
|
"loss": 2.0628, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.5348488345257327, |
|
"grad_norm": 0.24942269921302795, |
|
"learning_rate": 5.739386848847346e-05, |
|
"loss": 2.0603, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.5354258019847681, |
|
"grad_norm": 0.2586665749549866, |
|
"learning_rate": 5.728839923624243e-05, |
|
"loss": 2.0637, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.5360027694438033, |
|
"grad_norm": 0.2691705822944641, |
|
"learning_rate": 5.7182896839752374e-05, |
|
"loss": 2.0582, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.5365797369028387, |
|
"grad_norm": 0.24790114164352417, |
|
"learning_rate": 5.7077361778779246e-05, |
|
"loss": 2.0675, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.537156704361874, |
|
"grad_norm": 0.2401387095451355, |
|
"learning_rate": 5.697179453324758e-05, |
|
"loss": 2.0668, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.5377336718209093, |
|
"grad_norm": 0.24574357271194458, |
|
"learning_rate": 5.686619558322821e-05, |
|
"loss": 2.0511, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.5383106392799446, |
|
"grad_norm": 0.3961896002292633, |
|
"learning_rate": 5.676056540893618e-05, |
|
"loss": 2.0449, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.53888760673898, |
|
"grad_norm": 0.35582342743873596, |
|
"learning_rate": 5.665490449072854e-05, |
|
"loss": 2.0737, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.5394645741980152, |
|
"grad_norm": 0.2613429129123688, |
|
"learning_rate": 5.6549213309102136e-05, |
|
"loss": 2.0627, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.5400415416570505, |
|
"grad_norm": 0.24746835231781006, |
|
"learning_rate": 5.6443492344691426e-05, |
|
"loss": 2.0437, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.5406185091160859, |
|
"grad_norm": 0.2716011106967926, |
|
"learning_rate": 5.633774207826632e-05, |
|
"loss": 2.0565, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.5411954765751211, |
|
"grad_norm": 0.2716337740421295, |
|
"learning_rate": 5.623196299072999e-05, |
|
"loss": 2.0446, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.5417724440341565, |
|
"grad_norm": 0.26952430605888367, |
|
"learning_rate": 5.612615556311665e-05, |
|
"loss": 2.0628, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.5423494114931918, |
|
"grad_norm": 0.23789243400096893, |
|
"learning_rate": 5.602032027658941e-05, |
|
"loss": 2.0583, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.5429263789522271, |
|
"grad_norm": 0.3613603413105011, |
|
"learning_rate": 5.591445761243806e-05, |
|
"loss": 2.0637, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.5435033464112624, |
|
"grad_norm": 0.5396514534950256, |
|
"learning_rate": 5.580856805207687e-05, |
|
"loss": 2.0623, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.5440803138702978, |
|
"grad_norm": 0.2915268838405609, |
|
"learning_rate": 5.5702652077042485e-05, |
|
"loss": 2.0632, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.544657281329333, |
|
"grad_norm": 0.22387048602104187, |
|
"learning_rate": 5.5596710168991594e-05, |
|
"loss": 2.0496, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.5452342487883683, |
|
"grad_norm": 0.27139759063720703, |
|
"learning_rate": 5.549074280969887e-05, |
|
"loss": 2.0523, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.5458112162474037, |
|
"grad_norm": 0.2295805811882019, |
|
"learning_rate": 5.538475048105471e-05, |
|
"loss": 2.0526, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.5463881837064389, |
|
"grad_norm": 0.2620329260826111, |
|
"learning_rate": 5.527873366506308e-05, |
|
"loss": 2.039, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.5469651511654743, |
|
"grad_norm": 0.25486627221107483, |
|
"learning_rate": 5.517269284383923e-05, |
|
"loss": 2.0497, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.5475421186245096, |
|
"grad_norm": 0.30340516567230225, |
|
"learning_rate": 5.506662849960769e-05, |
|
"loss": 2.0385, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.5481190860835449, |
|
"grad_norm": 0.2664933204650879, |
|
"learning_rate": 5.496054111469987e-05, |
|
"loss": 2.046, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.5486960535425802, |
|
"grad_norm": 0.33765774965286255, |
|
"learning_rate": 5.485443117155198e-05, |
|
"loss": 2.0455, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.5492730210016155, |
|
"grad_norm": 0.26019567251205444, |
|
"learning_rate": 5.474829915270283e-05, |
|
"loss": 2.0551, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.5498499884606508, |
|
"grad_norm": 0.3324459195137024, |
|
"learning_rate": 5.464214554079162e-05, |
|
"loss": 2.0465, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.5504269559196862, |
|
"grad_norm": 0.23958685994148254, |
|
"learning_rate": 5.453597081855574e-05, |
|
"loss": 2.0526, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.5510039233787214, |
|
"grad_norm": 0.24503254890441895, |
|
"learning_rate": 5.442977546882856e-05, |
|
"loss": 2.054, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.5515808908377567, |
|
"grad_norm": 0.29835647344589233, |
|
"learning_rate": 5.432355997453729e-05, |
|
"loss": 2.0557, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.5521578582967921, |
|
"grad_norm": 0.2821817398071289, |
|
"learning_rate": 5.421732481870073e-05, |
|
"loss": 2.0621, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.5527348257558273, |
|
"grad_norm": 0.2836339473724365, |
|
"learning_rate": 5.411107048442708e-05, |
|
"loss": 2.0618, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.5533117932148627, |
|
"grad_norm": 0.23530443012714386, |
|
"learning_rate": 5.4004797454911784e-05, |
|
"loss": 2.0579, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.553888760673898, |
|
"grad_norm": 0.27028971910476685, |
|
"learning_rate": 5.389850621343525e-05, |
|
"loss": 2.0593, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.5544657281329333, |
|
"grad_norm": 0.26975545287132263, |
|
"learning_rate": 5.379219724336077e-05, |
|
"loss": 2.049, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.5550426955919686, |
|
"grad_norm": 0.2712181806564331, |
|
"learning_rate": 5.368587102813224e-05, |
|
"loss": 2.0533, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.555619663051004, |
|
"grad_norm": 0.2469034343957901, |
|
"learning_rate": 5.357952805127194e-05, |
|
"loss": 2.0419, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.5561966305100392, |
|
"grad_norm": 0.3148432970046997, |
|
"learning_rate": 5.347316879637844e-05, |
|
"loss": 2.0518, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.5567735979690746, |
|
"grad_norm": 0.2899792492389679, |
|
"learning_rate": 5.3366793747124245e-05, |
|
"loss": 2.0508, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.5573505654281099, |
|
"grad_norm": 0.2373170405626297, |
|
"learning_rate": 5.3260403387253764e-05, |
|
"loss": 2.0638, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.5579275328871451, |
|
"grad_norm": 0.29644837975502014, |
|
"learning_rate": 5.315399820058102e-05, |
|
"loss": 2.0491, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.5585045003461805, |
|
"grad_norm": 0.3238583505153656, |
|
"learning_rate": 5.304757867098742e-05, |
|
"loss": 2.0491, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.5590814678052158, |
|
"grad_norm": 0.328906774520874, |
|
"learning_rate": 5.294114528241963e-05, |
|
"loss": 2.0633, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.5596584352642511, |
|
"grad_norm": 0.3107961118221283, |
|
"learning_rate": 5.283469851888735e-05, |
|
"loss": 2.0385, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.5602354027232864, |
|
"grad_norm": 0.3088037073612213, |
|
"learning_rate": 5.272823886446107e-05, |
|
"loss": 2.063, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.5608123701823217, |
|
"grad_norm": 0.2720682919025421, |
|
"learning_rate": 5.262176680326991e-05, |
|
"loss": 2.0408, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.561389337641357, |
|
"grad_norm": 0.2519686818122864, |
|
"learning_rate": 5.251528281949942e-05, |
|
"loss": 2.0475, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.5619663051003924, |
|
"grad_norm": 0.2880825102329254, |
|
"learning_rate": 5.240878739738938e-05, |
|
"loss": 2.0548, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.5625432725594276, |
|
"grad_norm": 0.3425326645374298, |
|
"learning_rate": 5.230228102123157e-05, |
|
"loss": 2.0552, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.563120240018463, |
|
"grad_norm": 0.23194807767868042, |
|
"learning_rate": 5.219576417536757e-05, |
|
"loss": 2.0372, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.5636972074774983, |
|
"grad_norm": 0.23607051372528076, |
|
"learning_rate": 5.2089237344186604e-05, |
|
"loss": 2.0448, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.5642741749365335, |
|
"grad_norm": 0.31973376870155334, |
|
"learning_rate": 5.1982701012123293e-05, |
|
"loss": 2.043, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.5648511423955689, |
|
"grad_norm": 0.3390558063983917, |
|
"learning_rate": 5.1876155663655423e-05, |
|
"loss": 2.05, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.5654281098546042, |
|
"grad_norm": 0.24147625267505646, |
|
"learning_rate": 5.176960178330186e-05, |
|
"loss": 2.0488, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.5660050773136395, |
|
"grad_norm": 0.226022869348526, |
|
"learning_rate": 5.166303985562021e-05, |
|
"loss": 2.0434, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.5665820447726748, |
|
"grad_norm": 0.2638387680053711, |
|
"learning_rate": 5.155647036520469e-05, |
|
"loss": 2.0366, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.5671590122317102, |
|
"grad_norm": 0.31156018376350403, |
|
"learning_rate": 5.144989379668391e-05, |
|
"loss": 2.0387, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.5677359796907454, |
|
"grad_norm": 0.23942683637142181, |
|
"learning_rate": 5.1343310634718665e-05, |
|
"loss": 2.0422, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.5683129471497808, |
|
"grad_norm": 0.29752033948898315, |
|
"learning_rate": 5.123672136399975e-05, |
|
"loss": 2.0551, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.5688899146088161, |
|
"grad_norm": 0.26006224751472473, |
|
"learning_rate": 5.113012646924572e-05, |
|
"loss": 2.0375, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.5694668820678513, |
|
"grad_norm": 0.31031838059425354, |
|
"learning_rate": 5.102352643520072e-05, |
|
"loss": 2.0491, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.5700438495268867, |
|
"grad_norm": 0.2978913187980652, |
|
"learning_rate": 5.091692174663224e-05, |
|
"loss": 2.0709, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.570620816985922, |
|
"grad_norm": 0.29858845472335815, |
|
"learning_rate": 5.0810312888328985e-05, |
|
"loss": 2.0553, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.5711977844449573, |
|
"grad_norm": 0.2596759498119354, |
|
"learning_rate": 5.070370034509856e-05, |
|
"loss": 2.0661, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.5717747519039926, |
|
"grad_norm": 0.2523353695869446, |
|
"learning_rate": 5.0597084601765376e-05, |
|
"loss": 2.0336, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.572351719363028, |
|
"grad_norm": 0.2640801668167114, |
|
"learning_rate": 5.049046614316837e-05, |
|
"loss": 2.0434, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.5729286868220632, |
|
"grad_norm": 0.3435620069503784, |
|
"learning_rate": 5.038384545415885e-05, |
|
"loss": 2.0458, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.5735056542810986, |
|
"grad_norm": 0.2811577022075653, |
|
"learning_rate": 5.0277223019598265e-05, |
|
"loss": 2.0498, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.5740826217401338, |
|
"grad_norm": 0.33941999077796936, |
|
"learning_rate": 5.0170599324355974e-05, |
|
"loss": 2.0369, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.5746595891991692, |
|
"grad_norm": 0.2926217019557953, |
|
"learning_rate": 5.0063974853307086e-05, |
|
"loss": 2.0558, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.5752365566582045, |
|
"grad_norm": 0.2736097276210785, |
|
"learning_rate": 4.9957350091330217e-05, |
|
"loss": 2.0393, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.5758135241172397, |
|
"grad_norm": 0.23603315651416779, |
|
"learning_rate": 4.985072552330537e-05, |
|
"loss": 2.0421, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.5763904915762751, |
|
"grad_norm": 0.2496812641620636, |
|
"learning_rate": 4.97441016341116e-05, |
|
"loss": 2.0536, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.5769674590353104, |
|
"grad_norm": 0.2511531114578247, |
|
"learning_rate": 4.963747890862491e-05, |
|
"loss": 2.0478, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 17332, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.182962763871027e+19, |
|
"train_batch_size": 96, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|