|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.998681608437706, |
|
"eval_steps": 500, |
|
"global_step": 7583, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0065919578114700065, |
|
"grad_norm": 11.249381065368652, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 1.324, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013183915622940013, |
|
"grad_norm": 8.759961128234863, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 1.1761, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01977587343441002, |
|
"grad_norm": 2.233778953552246, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 0.6049, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.026367831245880026, |
|
"grad_norm": 3.505425453186035, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 0.3419, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03295978905735003, |
|
"grad_norm": 1.7327282428741455, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.2418, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03955174686882004, |
|
"grad_norm": 1.4897282123565674, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 0.2017, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04614370468029005, |
|
"grad_norm": 0.8921314477920532, |
|
"learning_rate": 3.6842105263157895e-05, |
|
"loss": 0.1668, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05273566249176005, |
|
"grad_norm": 1.3826392889022827, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 0.1532, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05932762030323006, |
|
"grad_norm": 1.489062786102295, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 0.1084, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06591957811470006, |
|
"grad_norm": 1.280565619468689, |
|
"learning_rate": 5.2631578947368424e-05, |
|
"loss": 0.1128, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07251153592617007, |
|
"grad_norm": 1.2948462963104248, |
|
"learning_rate": 5.789473684210527e-05, |
|
"loss": 0.1044, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07910349373764008, |
|
"grad_norm": 1.5762895345687866, |
|
"learning_rate": 6.31578947368421e-05, |
|
"loss": 0.1034, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08569545154911008, |
|
"grad_norm": 1.0561785697937012, |
|
"learning_rate": 6.842105263157895e-05, |
|
"loss": 0.0798, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0922874093605801, |
|
"grad_norm": 0.9102309346199036, |
|
"learning_rate": 7.368421052631579e-05, |
|
"loss": 0.0752, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09887936717205009, |
|
"grad_norm": 1.4243663549423218, |
|
"learning_rate": 7.894736842105263e-05, |
|
"loss": 0.0863, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1054713249835201, |
|
"grad_norm": 0.7150789499282837, |
|
"learning_rate": 8.421052631578948e-05, |
|
"loss": 0.0778, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11206328279499012, |
|
"grad_norm": 0.9231832027435303, |
|
"learning_rate": 8.947368421052632e-05, |
|
"loss": 0.0796, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11865524060646011, |
|
"grad_norm": 0.5305670499801636, |
|
"learning_rate": 9.473684210526316e-05, |
|
"loss": 0.0733, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12524719841793014, |
|
"grad_norm": 1.0431275367736816, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0713, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13183915622940012, |
|
"grad_norm": 1.0667047500610352, |
|
"learning_rate": 0.00010526315789473685, |
|
"loss": 0.0738, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13843111404087013, |
|
"grad_norm": 0.9431530833244324, |
|
"learning_rate": 0.0001105263157894737, |
|
"loss": 0.0695, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14502307185234015, |
|
"grad_norm": 1.231911063194275, |
|
"learning_rate": 0.00011578947368421053, |
|
"loss": 0.0707, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15161502966381016, |
|
"grad_norm": 0.5772905945777893, |
|
"learning_rate": 0.00012105263157894738, |
|
"loss": 0.0642, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15820698747528017, |
|
"grad_norm": 0.6241514086723328, |
|
"learning_rate": 0.0001263157894736842, |
|
"loss": 0.0621, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16479894528675015, |
|
"grad_norm": 0.7449037432670593, |
|
"learning_rate": 0.00013157894736842108, |
|
"loss": 0.0639, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17139090309822017, |
|
"grad_norm": 0.9040747880935669, |
|
"learning_rate": 0.0001368421052631579, |
|
"loss": 0.0595, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17798286090969018, |
|
"grad_norm": 0.6246598958969116, |
|
"learning_rate": 0.00014210526315789474, |
|
"loss": 0.0612, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1845748187211602, |
|
"grad_norm": 0.6300843358039856, |
|
"learning_rate": 0.00014736842105263158, |
|
"loss": 0.0574, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1911667765326302, |
|
"grad_norm": 0.7051455974578857, |
|
"learning_rate": 0.00015263157894736845, |
|
"loss": 0.0489, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19775873434410018, |
|
"grad_norm": 0.8903814554214478, |
|
"learning_rate": 0.00015789473684210527, |
|
"loss": 0.0588, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2043506921555702, |
|
"grad_norm": 0.8815051317214966, |
|
"learning_rate": 0.0001631578947368421, |
|
"loss": 0.0605, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2109426499670402, |
|
"grad_norm": 0.7266796231269836, |
|
"learning_rate": 0.00016842105263157895, |
|
"loss": 0.0555, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21753460777851022, |
|
"grad_norm": 1.033163070678711, |
|
"learning_rate": 0.0001736842105263158, |
|
"loss": 0.056, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22412656558998023, |
|
"grad_norm": 1.339528203010559, |
|
"learning_rate": 0.00017894736842105264, |
|
"loss": 0.0513, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.23071852340145024, |
|
"grad_norm": 1.1713142395019531, |
|
"learning_rate": 0.00018421052631578948, |
|
"loss": 0.0604, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23731048121292023, |
|
"grad_norm": 0.7305978536605835, |
|
"learning_rate": 0.00018947368421052632, |
|
"loss": 0.061, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 0.6867638826370239, |
|
"learning_rate": 0.00019473684210526317, |
|
"loss": 0.0446, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2504943968358603, |
|
"grad_norm": 0.480622798204422, |
|
"learning_rate": 0.0002, |
|
"loss": 0.0507, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25708635464733026, |
|
"grad_norm": 0.6892393827438354, |
|
"learning_rate": 0.00019999904886484996, |
|
"loss": 0.0562, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26367831245880025, |
|
"grad_norm": 0.8014799952507019, |
|
"learning_rate": 0.00019999619547749294, |
|
"loss": 0.0407, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 0.8931164741516113, |
|
"learning_rate": 0.0001999914398922081, |
|
"loss": 0.0488, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27686222808174027, |
|
"grad_norm": 0.5557290315628052, |
|
"learning_rate": 0.00019998478219945958, |
|
"loss": 0.0533, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2834541858932103, |
|
"grad_norm": 0.9810464978218079, |
|
"learning_rate": 0.00019997622252589464, |
|
"loss": 0.052, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2900461437046803, |
|
"grad_norm": 0.6797704696655273, |
|
"learning_rate": 0.00019996576103434137, |
|
"loss": 0.0514, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2966381015161503, |
|
"grad_norm": 1.141650915145874, |
|
"learning_rate": 0.0001999533979238057, |
|
"loss": 0.0489, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3032300593276203, |
|
"grad_norm": 0.6689559817314148, |
|
"learning_rate": 0.00019993913342946734, |
|
"loss": 0.0441, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3098220171390903, |
|
"grad_norm": 0.524917721748352, |
|
"learning_rate": 0.0001999229678226756, |
|
"loss": 0.0457, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31641397495056034, |
|
"grad_norm": 0.7408258318901062, |
|
"learning_rate": 0.00019990490141094392, |
|
"loss": 0.0428, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3230059327620303, |
|
"grad_norm": 0.5927634835243225, |
|
"learning_rate": 0.0001998849345379444, |
|
"loss": 0.0431, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3295978905735003, |
|
"grad_norm": 0.4574936628341675, |
|
"learning_rate": 0.00019986306758350083, |
|
"loss": 0.038, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33618984838497035, |
|
"grad_norm": 0.6031479835510254, |
|
"learning_rate": 0.00019983930096358188, |
|
"loss": 0.0442, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.34278180619644033, |
|
"grad_norm": 0.4019775688648224, |
|
"learning_rate": 0.00019981363513029283, |
|
"loss": 0.0336, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34937376400791037, |
|
"grad_norm": 0.6691102981567383, |
|
"learning_rate": 0.00019978607057186725, |
|
"loss": 0.0387, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35596572181938035, |
|
"grad_norm": 0.39324843883514404, |
|
"learning_rate": 0.00019975660781265753, |
|
"loss": 0.0449, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.36255767963085034, |
|
"grad_norm": 0.5069633722305298, |
|
"learning_rate": 0.00019972524741312497, |
|
"loss": 0.0319, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3691496374423204, |
|
"grad_norm": 0.5699636936187744, |
|
"learning_rate": 0.00019969198996982917, |
|
"loss": 0.0402, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.37574159525379036, |
|
"grad_norm": 1.0686895847320557, |
|
"learning_rate": 0.00019965683611541655, |
|
"loss": 0.0542, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3823335530652604, |
|
"grad_norm": 0.4853604733943939, |
|
"learning_rate": 0.00019961978651860854, |
|
"loss": 0.0476, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3889255108767304, |
|
"grad_norm": 0.8250619173049927, |
|
"learning_rate": 0.0001995808418841885, |
|
"loss": 0.034, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.39551746868820037, |
|
"grad_norm": 0.6085853576660156, |
|
"learning_rate": 0.00019954000295298871, |
|
"loss": 0.0389, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4021094264996704, |
|
"grad_norm": 3.688549041748047, |
|
"learning_rate": 0.000199497270501876, |
|
"loss": 0.0511, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4087013843111404, |
|
"grad_norm": 1.5635132789611816, |
|
"learning_rate": 0.00019945264534373714, |
|
"loss": 0.1116, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.41529334212261043, |
|
"grad_norm": 0.7884135246276855, |
|
"learning_rate": 0.00019940612832746322, |
|
"loss": 0.0737, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4218852999340804, |
|
"grad_norm": 0.9017935395240784, |
|
"learning_rate": 0.0001993577203379336, |
|
"loss": 0.0789, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42847725774555045, |
|
"grad_norm": 0.8649272918701172, |
|
"learning_rate": 0.00019930742229599914, |
|
"loss": 0.0728, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.43506921555702044, |
|
"grad_norm": 0.772191047668457, |
|
"learning_rate": 0.00019925523515846455, |
|
"loss": 0.0697, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4416611733684904, |
|
"grad_norm": 0.5265079140663147, |
|
"learning_rate": 0.00019920115991807022, |
|
"loss": 0.0622, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44825313117996046, |
|
"grad_norm": 0.8318515419960022, |
|
"learning_rate": 0.0001991451976034734, |
|
"loss": 0.0786, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.45484508899143045, |
|
"grad_norm": 0.7197186946868896, |
|
"learning_rate": 0.0001990873492792286, |
|
"loss": 0.059, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4614370468029005, |
|
"grad_norm": 0.9418641328811646, |
|
"learning_rate": 0.00019902761604576725, |
|
"loss": 0.078, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.46802900461437047, |
|
"grad_norm": 0.7985256314277649, |
|
"learning_rate": 0.00019896599903937697, |
|
"loss": 0.0834, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.47462096242584045, |
|
"grad_norm": 0.6049144268035889, |
|
"learning_rate": 0.00019890249943217976, |
|
"loss": 0.0656, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.4812129202373105, |
|
"grad_norm": 0.6395105719566345, |
|
"learning_rate": 0.0001988371184321098, |
|
"loss": 0.0764, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.58722984790802, |
|
"learning_rate": 0.00019876985728289038, |
|
"loss": 0.0588, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4943968358602505, |
|
"grad_norm": 0.4679464101791382, |
|
"learning_rate": 0.00019870071726401043, |
|
"loss": 0.0638, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5009887936717206, |
|
"grad_norm": 0.509775698184967, |
|
"learning_rate": 0.00019862969969069996, |
|
"loss": 0.0602, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5075807514831905, |
|
"grad_norm": 0.8126184344291687, |
|
"learning_rate": 0.00019855680591390518, |
|
"loss": 0.069, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5141727092946605, |
|
"grad_norm": 0.7676377892494202, |
|
"learning_rate": 0.00019848203732026275, |
|
"loss": 0.0704, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5207646671061306, |
|
"grad_norm": 1.0301965475082397, |
|
"learning_rate": 0.00019840539533207344, |
|
"loss": 0.0666, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5273566249176005, |
|
"grad_norm": 0.6810826063156128, |
|
"learning_rate": 0.000198326881407275, |
|
"loss": 0.0698, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5339485827290705, |
|
"grad_norm": 0.4939572513103485, |
|
"learning_rate": 0.00019824649703941455, |
|
"loss": 0.0548, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 0.6614457964897156, |
|
"learning_rate": 0.00019816424375762001, |
|
"loss": 0.0748, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5471324983520105, |
|
"grad_norm": 0.7715848088264465, |
|
"learning_rate": 0.00019808012312657114, |
|
"loss": 0.0653, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5537244561634805, |
|
"grad_norm": 0.5254570245742798, |
|
"learning_rate": 0.00019799413674646973, |
|
"loss": 0.0537, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5603164139749506, |
|
"grad_norm": 0.7626491785049438, |
|
"learning_rate": 0.0001979062862530091, |
|
"loss": 0.0599, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5669083717864206, |
|
"grad_norm": 0.6767850518226624, |
|
"learning_rate": 0.00019781657331734316, |
|
"loss": 0.0644, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5735003295978905, |
|
"grad_norm": 0.4016531705856323, |
|
"learning_rate": 0.0001977249996460544, |
|
"loss": 0.0543, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5800922874093606, |
|
"grad_norm": 1.0104889869689941, |
|
"learning_rate": 0.0001976315669811216, |
|
"loss": 0.0681, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5866842452208306, |
|
"grad_norm": 0.7674484252929688, |
|
"learning_rate": 0.00019753627709988658, |
|
"loss": 0.0562, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5932762030323006, |
|
"grad_norm": 1.2781016826629639, |
|
"learning_rate": 0.00019743913181502048, |
|
"loss": 0.0602, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5998681608437706, |
|
"grad_norm": 0.5540818572044373, |
|
"learning_rate": 0.00019734013297448914, |
|
"loss": 0.0631, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6064601186552406, |
|
"grad_norm": 0.7823266386985779, |
|
"learning_rate": 0.00019723928246151814, |
|
"loss": 0.0637, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6130520764667106, |
|
"grad_norm": 0.6756680607795715, |
|
"learning_rate": 0.00019713658219455685, |
|
"loss": 0.0684, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6196440342781806, |
|
"grad_norm": 0.8224459290504456, |
|
"learning_rate": 0.0001970320341272419, |
|
"loss": 0.0512, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6262359920896506, |
|
"grad_norm": 0.8429596424102783, |
|
"learning_rate": 0.00019692564024836016, |
|
"loss": 0.0516, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6328279499011207, |
|
"grad_norm": 0.7025866508483887, |
|
"learning_rate": 0.0001968174025818108, |
|
"loss": 0.0667, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6394199077125906, |
|
"grad_norm": 0.624162495136261, |
|
"learning_rate": 0.00019670732318656677, |
|
"loss": 0.0575, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6460118655240606, |
|
"grad_norm": 0.5887486338615417, |
|
"learning_rate": 0.00019659540415663571, |
|
"loss": 0.0488, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6526038233355307, |
|
"grad_norm": 0.45346468687057495, |
|
"learning_rate": 0.00019648164762102013, |
|
"loss": 0.0483, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6591957811470006, |
|
"grad_norm": 0.6038155555725098, |
|
"learning_rate": 0.0001963660557436768, |
|
"loss": 0.054, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6657877389584707, |
|
"grad_norm": 0.5043258666992188, |
|
"learning_rate": 0.00019624863072347564, |
|
"loss": 0.0631, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6723796967699407, |
|
"grad_norm": 0.6452742218971252, |
|
"learning_rate": 0.000196129374794158, |
|
"loss": 0.0551, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6789716545814107, |
|
"grad_norm": 0.6438404321670532, |
|
"learning_rate": 0.0001960082902242939, |
|
"loss": 0.0501, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6855636123928807, |
|
"grad_norm": 0.8768063187599182, |
|
"learning_rate": 0.00019588537931723927, |
|
"loss": 0.0516, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6921555702043507, |
|
"grad_norm": 0.767848014831543, |
|
"learning_rate": 0.00019576064441109172, |
|
"loss": 0.0501, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6987475280158207, |
|
"grad_norm": 0.6131387948989868, |
|
"learning_rate": 0.00019563408787864634, |
|
"loss": 0.0595, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7053394858272907, |
|
"grad_norm": 0.4806978404521942, |
|
"learning_rate": 0.00019550571212735048, |
|
"loss": 0.0475, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7119314436387607, |
|
"grad_norm": 0.4950248897075653, |
|
"learning_rate": 0.00019537551959925787, |
|
"loss": 0.048, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7185234014502307, |
|
"grad_norm": 0.5537814497947693, |
|
"learning_rate": 0.0001952435127709824, |
|
"loss": 0.046, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7251153592617007, |
|
"grad_norm": 0.4151875078678131, |
|
"learning_rate": 0.00019510969415365063, |
|
"loss": 0.0429, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 0.42159780859947205, |
|
"learning_rate": 0.0001949740662928545, |
|
"loss": 0.0434, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7382992748846408, |
|
"grad_norm": 0.454226016998291, |
|
"learning_rate": 0.00019483663176860248, |
|
"loss": 0.0421, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7448912326961108, |
|
"grad_norm": 0.37481585144996643, |
|
"learning_rate": 0.00019469739319527064, |
|
"loss": 0.043, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.7514831905075807, |
|
"grad_norm": 0.6487095952033997, |
|
"learning_rate": 0.00019455635322155313, |
|
"loss": 0.0433, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7580751483190508, |
|
"grad_norm": 0.44085580110549927, |
|
"learning_rate": 0.00019441351453041138, |
|
"loss": 0.0492, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7646671061305208, |
|
"grad_norm": 0.49984055757522583, |
|
"learning_rate": 0.00019426887983902343, |
|
"loss": 0.0431, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7712590639419907, |
|
"grad_norm": 0.5114363431930542, |
|
"learning_rate": 0.00019412245189873203, |
|
"loss": 0.0448, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.7778510217534608, |
|
"grad_norm": 0.5482351779937744, |
|
"learning_rate": 0.00019397423349499246, |
|
"loss": 0.0481, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7844429795649308, |
|
"grad_norm": 0.7064313888549805, |
|
"learning_rate": 0.00019382422744731933, |
|
"loss": 0.0476, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7910349373764007, |
|
"grad_norm": 0.5201088190078735, |
|
"learning_rate": 0.0001936724366092332, |
|
"loss": 0.0596, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7976268951878708, |
|
"grad_norm": 0.794978678226471, |
|
"learning_rate": 0.000193518863868206, |
|
"loss": 0.0484, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8042188529993408, |
|
"grad_norm": 0.5086749196052551, |
|
"learning_rate": 0.00019336351214560647, |
|
"loss": 0.0482, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 0.5501623749732971, |
|
"learning_rate": 0.00019320638439664426, |
|
"loss": 0.0417, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8174027686222808, |
|
"grad_norm": 0.4340960383415222, |
|
"learning_rate": 0.0001930474836103138, |
|
"loss": 0.0406, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8239947264337508, |
|
"grad_norm": 0.5098422169685364, |
|
"learning_rate": 0.00019288681280933768, |
|
"loss": 0.0485, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8305866842452209, |
|
"grad_norm": 0.4968768358230591, |
|
"learning_rate": 0.00019272437505010877, |
|
"loss": 0.0412, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.8371786420566908, |
|
"grad_norm": 0.46997663378715515, |
|
"learning_rate": 0.00019256017342263228, |
|
"loss": 0.0388, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.8437705998681608, |
|
"grad_norm": 0.5510318279266357, |
|
"learning_rate": 0.00019239421105046706, |
|
"loss": 0.056, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.8503625576796309, |
|
"grad_norm": 0.47607627511024475, |
|
"learning_rate": 0.000192226491090666, |
|
"loss": 0.0462, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.8569545154911009, |
|
"grad_norm": 0.4591579735279083, |
|
"learning_rate": 0.00019205701673371606, |
|
"loss": 0.0456, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8635464733025708, |
|
"grad_norm": 0.45051664113998413, |
|
"learning_rate": 0.00019188579120347766, |
|
"loss": 0.0402, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.8701384311140409, |
|
"grad_norm": 0.3680923283100128, |
|
"learning_rate": 0.00019171281775712316, |
|
"loss": 0.0378, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.8767303889255109, |
|
"grad_norm": 0.4515272080898285, |
|
"learning_rate": 0.00019153809968507505, |
|
"loss": 0.0439, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.8833223467369808, |
|
"grad_norm": 0.5114394426345825, |
|
"learning_rate": 0.00019136164031094337, |
|
"loss": 0.0522, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.8899143045484509, |
|
"grad_norm": 0.6060967445373535, |
|
"learning_rate": 0.00019118344299146235, |
|
"loss": 0.04, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8965062623599209, |
|
"grad_norm": 0.7507016658782959, |
|
"learning_rate": 0.00019100351111642666, |
|
"loss": 0.0557, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9030982201713909, |
|
"grad_norm": 0.4493657648563385, |
|
"learning_rate": 0.00019082184810862698, |
|
"loss": 0.0424, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9096901779828609, |
|
"grad_norm": 0.5429974794387817, |
|
"learning_rate": 0.00019063845742378467, |
|
"loss": 0.0441, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.9162821357943309, |
|
"grad_norm": 0.43085166811943054, |
|
"learning_rate": 0.00019045334255048634, |
|
"loss": 0.046, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.922874093605801, |
|
"grad_norm": 0.41755935549736023, |
|
"learning_rate": 0.0001902665070101172, |
|
"loss": 0.0461, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9294660514172709, |
|
"grad_norm": 0.44052428007125854, |
|
"learning_rate": 0.00019007795435679428, |
|
"loss": 0.052, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.9360580092287409, |
|
"grad_norm": 0.4310389757156372, |
|
"learning_rate": 0.00018988768817729864, |
|
"loss": 0.0442, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.942649967040211, |
|
"grad_norm": 0.3892590403556824, |
|
"learning_rate": 0.0001896957120910074, |
|
"loss": 0.0416, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.9492419248516809, |
|
"grad_norm": 0.7788804769515991, |
|
"learning_rate": 0.00018950202974982454, |
|
"loss": 0.0339, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.955833882663151, |
|
"grad_norm": 0.5524693727493286, |
|
"learning_rate": 0.00018930664483811173, |
|
"loss": 0.045, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.962425840474621, |
|
"grad_norm": 0.41249391436576843, |
|
"learning_rate": 0.00018910956107261816, |
|
"loss": 0.0381, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.9690177982860909, |
|
"grad_norm": 0.3245869576931, |
|
"learning_rate": 0.00018891078220240973, |
|
"loss": 0.0277, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.28615134954452515, |
|
"learning_rate": 0.0001887103120087979, |
|
"loss": 0.0365, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.982201713909031, |
|
"grad_norm": 0.32258233428001404, |
|
"learning_rate": 0.00018850815430526758, |
|
"loss": 0.0339, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.988793671720501, |
|
"grad_norm": 0.4749410152435303, |
|
"learning_rate": 0.00018830431293740473, |
|
"loss": 0.0414, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.995385629531971, |
|
"grad_norm": 0.44143855571746826, |
|
"learning_rate": 0.00018809879178282313, |
|
"loss": 0.0288, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0019775873434411, |
|
"grad_norm": 0.4565713107585907, |
|
"learning_rate": 0.00018789159475109067, |
|
"loss": 0.0343, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.008569545154911, |
|
"grad_norm": 0.5609179735183716, |
|
"learning_rate": 0.000187682725783655, |
|
"loss": 0.0423, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.015161502966381, |
|
"grad_norm": 0.4169975221157074, |
|
"learning_rate": 0.00018747218885376842, |
|
"loss": 0.0341, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.0217534607778511, |
|
"grad_norm": 0.44291096925735474, |
|
"learning_rate": 0.0001872599879664124, |
|
"loss": 0.0435, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.028345418589321, |
|
"grad_norm": 0.31878435611724854, |
|
"learning_rate": 0.00018704612715822144, |
|
"loss": 0.0402, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.034937376400791, |
|
"grad_norm": 0.4876072406768799, |
|
"learning_rate": 0.0001868306104974061, |
|
"loss": 0.0298, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.0415293342122611, |
|
"grad_norm": 0.4452480375766754, |
|
"learning_rate": 0.0001866134420836759, |
|
"loss": 0.042, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.048121292023731, |
|
"grad_norm": 0.5295068025588989, |
|
"learning_rate": 0.00018639462604816103, |
|
"loss": 0.0408, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.054713249835201, |
|
"grad_norm": 0.349461168050766, |
|
"learning_rate": 0.00018617416655333395, |
|
"loss": 0.037, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0613052076466711, |
|
"grad_norm": 0.39832666516304016, |
|
"learning_rate": 0.00018595206779293015, |
|
"loss": 0.0406, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.067897165458141, |
|
"grad_norm": 0.5740079283714294, |
|
"learning_rate": 0.00018572833399186836, |
|
"loss": 0.0411, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.074489123269611, |
|
"grad_norm": 0.20162849128246307, |
|
"learning_rate": 0.00018550296940617034, |
|
"loss": 0.0333, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.40781688690185547, |
|
"learning_rate": 0.00018527597832287954, |
|
"loss": 0.036, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.087673038892551, |
|
"grad_norm": 0.2796386182308197, |
|
"learning_rate": 0.00018504736505997997, |
|
"loss": 0.0313, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.094264996704021, |
|
"grad_norm": 0.6502156853675842, |
|
"learning_rate": 0.00018481713396631383, |
|
"loss": 0.0428, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.1008569545154911, |
|
"grad_norm": 0.3565762937068939, |
|
"learning_rate": 0.00018458528942149886, |
|
"loss": 0.0363, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.107448912326961, |
|
"grad_norm": 0.2560652792453766, |
|
"learning_rate": 0.00018435183583584498, |
|
"loss": 0.0404, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.1140408701384312, |
|
"grad_norm": 0.4972442388534546, |
|
"learning_rate": 0.00018411677765027036, |
|
"loss": 0.053, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.1206328279499012, |
|
"grad_norm": 0.36633139848709106, |
|
"learning_rate": 0.0001838801193362171, |
|
"loss": 0.0363, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.127224785761371, |
|
"grad_norm": 0.4480843245983124, |
|
"learning_rate": 0.000183641865395566, |
|
"loss": 0.031, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.133816743572841, |
|
"grad_norm": 0.42788198590278625, |
|
"learning_rate": 0.00018340202036055102, |
|
"loss": 0.0408, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.1404087013843112, |
|
"grad_norm": 0.3363877534866333, |
|
"learning_rate": 0.00018316058879367303, |
|
"loss": 0.0431, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.147000659195781, |
|
"grad_norm": 0.48484691977500916, |
|
"learning_rate": 0.000182917575287613, |
|
"loss": 0.0497, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.1535926170072512, |
|
"grad_norm": 0.4944576025009155, |
|
"learning_rate": 0.00018267298446514473, |
|
"loss": 0.0381, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.1601845748187212, |
|
"grad_norm": 0.31334227323532104, |
|
"learning_rate": 0.00018242682097904673, |
|
"loss": 0.0374, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.166776532630191, |
|
"grad_norm": 0.4245593845844269, |
|
"learning_rate": 0.00018217908951201394, |
|
"loss": 0.0384, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.1733684904416612, |
|
"grad_norm": 0.3156047463417053, |
|
"learning_rate": 0.00018192979477656845, |
|
"loss": 0.0375, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.1799604482531312, |
|
"grad_norm": 0.38936617970466614, |
|
"learning_rate": 0.00018167894151497, |
|
"loss": 0.0383, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.186552406064601, |
|
"grad_norm": 0.39287203550338745, |
|
"learning_rate": 0.00018142653449912564, |
|
"loss": 0.0384, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1931443638760713, |
|
"grad_norm": 0.4132576882839203, |
|
"learning_rate": 0.0001811725785304991, |
|
"loss": 0.0333, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.1997363216875412, |
|
"grad_norm": 0.42320823669433594, |
|
"learning_rate": 0.00018091707844001935, |
|
"loss": 0.0282, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.2063282794990111, |
|
"grad_norm": 0.4071812927722931, |
|
"learning_rate": 0.00018066003908798873, |
|
"loss": 0.0315, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.2129202373104813, |
|
"grad_norm": 0.40392544865608215, |
|
"learning_rate": 0.0001804014653639904, |
|
"loss": 0.0331, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 0.4608232080936432, |
|
"learning_rate": 0.00018014136218679567, |
|
"loss": 0.0327, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.2261041529334213, |
|
"grad_norm": 0.5048249959945679, |
|
"learning_rate": 0.00017987973450426994, |
|
"loss": 0.0334, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.2326961107448913, |
|
"grad_norm": 0.5134670734405518, |
|
"learning_rate": 0.0001796165872932789, |
|
"loss": 0.0361, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.2392880685563612, |
|
"grad_norm": 0.339224249124527, |
|
"learning_rate": 0.00017935192555959385, |
|
"loss": 0.0336, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.2458800263678311, |
|
"grad_norm": 0.5917630195617676, |
|
"learning_rate": 0.0001790857543377963, |
|
"loss": 0.0447, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.2524719841793013, |
|
"grad_norm": 0.641945481300354, |
|
"learning_rate": 0.00017881807869118234, |
|
"loss": 0.0546, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.2590639419907712, |
|
"grad_norm": 0.4399726986885071, |
|
"learning_rate": 0.00017854890371166637, |
|
"loss": 0.0358, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.2656558998022414, |
|
"grad_norm": 0.32603511214256287, |
|
"learning_rate": 0.00017827823451968398, |
|
"loss": 0.0342, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.2722478576137113, |
|
"grad_norm": 0.659220814704895, |
|
"learning_rate": 0.0001780060762640949, |
|
"loss": 0.039, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.2788398154251812, |
|
"grad_norm": 0.4240771234035492, |
|
"learning_rate": 0.00017773243412208474, |
|
"loss": 0.035, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.2854317732366514, |
|
"grad_norm": 0.4172196090221405, |
|
"learning_rate": 0.0001774573132990667, |
|
"loss": 0.0379, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.2920237310481213, |
|
"grad_norm": 0.42398178577423096, |
|
"learning_rate": 0.00017718071902858256, |
|
"loss": 0.0373, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.2986156888595912, |
|
"grad_norm": 0.5154095888137817, |
|
"learning_rate": 0.00017690265657220288, |
|
"loss": 0.0403, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.3052076466710614, |
|
"grad_norm": 0.396801233291626, |
|
"learning_rate": 0.00017662313121942727, |
|
"loss": 0.0391, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.3117996044825313, |
|
"grad_norm": 0.4826532006263733, |
|
"learning_rate": 0.00017634214828758342, |
|
"loss": 0.0297, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.3183915622940012, |
|
"grad_norm": 0.508990466594696, |
|
"learning_rate": 0.00017605971312172622, |
|
"loss": 0.0378, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.3249835201054714, |
|
"grad_norm": 0.3308925926685333, |
|
"learning_rate": 0.000175775831094536, |
|
"loss": 0.0379, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.3315754779169413, |
|
"grad_norm": 0.4720020294189453, |
|
"learning_rate": 0.00017549050760621614, |
|
"loss": 0.0392, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.3381674357284115, |
|
"grad_norm": 0.6246912479400635, |
|
"learning_rate": 0.00017520374808439076, |
|
"loss": 0.0363, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.3447593935398814, |
|
"grad_norm": 0.33079174160957336, |
|
"learning_rate": 0.00017491555798400095, |
|
"loss": 0.0316, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 0.2520120143890381, |
|
"learning_rate": 0.00017462594278720145, |
|
"loss": 0.0325, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.3579433091628212, |
|
"grad_norm": 0.23862145841121674, |
|
"learning_rate": 0.00017433490800325614, |
|
"loss": 0.0351, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.3645352669742914, |
|
"grad_norm": 0.3477911353111267, |
|
"learning_rate": 0.00017404245916843324, |
|
"loss": 0.0389, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.3711272247857613, |
|
"grad_norm": 0.5003520846366882, |
|
"learning_rate": 0.00017374860184590015, |
|
"loss": 0.0368, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.3777191825972315, |
|
"grad_norm": 0.3755623698234558, |
|
"learning_rate": 0.00017345334162561734, |
|
"loss": 0.0341, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.3843111404087014, |
|
"grad_norm": 0.5258712768554688, |
|
"learning_rate": 0.00017315668412423238, |
|
"loss": 0.0334, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.3909030982201713, |
|
"grad_norm": 0.567348062992096, |
|
"learning_rate": 0.0001728586349849728, |
|
"loss": 0.0366, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.3974950560316415, |
|
"grad_norm": 0.4541948139667511, |
|
"learning_rate": 0.00017255919987753878, |
|
"loss": 0.0503, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.4040870138431114, |
|
"grad_norm": 0.44722017645835876, |
|
"learning_rate": 0.0001722583844979955, |
|
"loss": 0.0433, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.4106789716545813, |
|
"grad_norm": 0.25077545642852783, |
|
"learning_rate": 0.0001719561945686646, |
|
"loss": 0.0345, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.4172709294660515, |
|
"grad_norm": 0.3619667887687683, |
|
"learning_rate": 0.00017165263583801535, |
|
"loss": 0.0325, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.4238628872775214, |
|
"grad_norm": 0.6268120408058167, |
|
"learning_rate": 0.0001713477140805553, |
|
"loss": 0.0364, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.4304548450889913, |
|
"grad_norm": 0.5806043148040771, |
|
"learning_rate": 0.0001710414350967204, |
|
"loss": 0.037, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.4370468029004615, |
|
"grad_norm": 0.3783499002456665, |
|
"learning_rate": 0.00017073380471276496, |
|
"loss": 0.0318, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.4436387607119314, |
|
"grad_norm": 0.45143669843673706, |
|
"learning_rate": 0.0001704248287806503, |
|
"loss": 0.0344, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.4502307185234016, |
|
"grad_norm": 0.3384231626987457, |
|
"learning_rate": 0.00017011451317793384, |
|
"loss": 0.0306, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.4568226763348715, |
|
"grad_norm": 0.45972728729248047, |
|
"learning_rate": 0.00016980286380765714, |
|
"loss": 0.0394, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 0.31935372948646545, |
|
"learning_rate": 0.0001694898865982336, |
|
"loss": 0.0327, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.4700065919578114, |
|
"grad_norm": 0.3758127689361572, |
|
"learning_rate": 0.0001691755875033357, |
|
"loss": 0.0376, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.4765985497692815, |
|
"grad_norm": 0.7778825759887695, |
|
"learning_rate": 0.00016885997250178184, |
|
"loss": 0.0346, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.4831905075807514, |
|
"grad_norm": 0.7735721468925476, |
|
"learning_rate": 0.00016854304759742237, |
|
"loss": 0.038, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.4897824653922216, |
|
"grad_norm": 0.6678999662399292, |
|
"learning_rate": 0.00016822481881902568, |
|
"loss": 0.0488, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.4963744232036915, |
|
"grad_norm": 0.5145410895347595, |
|
"learning_rate": 0.00016790529222016328, |
|
"loss": 0.0423, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.5029663810151614, |
|
"grad_norm": 1.2216230630874634, |
|
"learning_rate": 0.00016758447387909474, |
|
"loss": 0.0435, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.5095583388266314, |
|
"grad_norm": 0.46562644839286804, |
|
"learning_rate": 0.00016726236989865213, |
|
"loss": 0.0329, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.5161502966381015, |
|
"grad_norm": 0.552429735660553, |
|
"learning_rate": 0.00016693898640612382, |
|
"loss": 0.041, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.5227422544495717, |
|
"grad_norm": 0.4718281328678131, |
|
"learning_rate": 0.00016661432955313789, |
|
"loss": 0.0317, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.5293342122610416, |
|
"grad_norm": 0.5447438955307007, |
|
"learning_rate": 0.00016628840551554522, |
|
"loss": 0.0365, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.5359261700725115, |
|
"grad_norm": 0.5384830236434937, |
|
"learning_rate": 0.00016596122049330206, |
|
"loss": 0.0365, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.5425181278839815, |
|
"grad_norm": 0.48313167691230774, |
|
"learning_rate": 0.0001656327807103518, |
|
"loss": 0.0381, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.5491100856954514, |
|
"grad_norm": 0.4898654520511627, |
|
"learning_rate": 0.000165303092414507, |
|
"loss": 0.0343, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.5557020435069215, |
|
"grad_norm": 0.47862598299980164, |
|
"learning_rate": 0.00016497216187733016, |
|
"loss": 0.0333, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.5622940013183917, |
|
"grad_norm": 0.4709709584712982, |
|
"learning_rate": 0.00016463999539401454, |
|
"loss": 0.0351, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.5688859591298616, |
|
"grad_norm": 0.5032598972320557, |
|
"learning_rate": 0.00016430659928326458, |
|
"loss": 0.0306, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.5754779169413315, |
|
"grad_norm": 0.9953115582466125, |
|
"learning_rate": 0.00016397197988717542, |
|
"loss": 0.0388, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.5820698747528015, |
|
"grad_norm": 0.5729079246520996, |
|
"learning_rate": 0.00016363614357111245, |
|
"loss": 0.0336, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.5886618325642716, |
|
"grad_norm": 0.8332236409187317, |
|
"learning_rate": 0.0001632990967235902, |
|
"loss": 0.0414, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.5952537903757416, |
|
"grad_norm": 1.0546754598617554, |
|
"learning_rate": 0.00016296084575615077, |
|
"loss": 0.0383, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.6018457481872117, |
|
"grad_norm": 0.546684205532074, |
|
"learning_rate": 0.0001626213971032418, |
|
"loss": 0.0382, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.6084377059986816, |
|
"grad_norm": 0.6224532723426819, |
|
"learning_rate": 0.00016228075722209422, |
|
"loss": 0.0379, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.6150296638101516, |
|
"grad_norm": 0.39089900255203247, |
|
"learning_rate": 0.00016193893259259934, |
|
"loss": 0.0364, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 0.5209794044494629, |
|
"learning_rate": 0.00016159592971718548, |
|
"loss": 0.0329, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.6282135794330916, |
|
"grad_norm": 0.45939525961875916, |
|
"learning_rate": 0.0001612517551206946, |
|
"loss": 0.0316, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.6348055372445618, |
|
"grad_norm": 0.4331035614013672, |
|
"learning_rate": 0.00016090641535025774, |
|
"loss": 0.0424, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.6413974950560317, |
|
"grad_norm": 0.447710782289505, |
|
"learning_rate": 0.0001605599169751708, |
|
"loss": 0.0387, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.6479894528675016, |
|
"grad_norm": 0.4073365330696106, |
|
"learning_rate": 0.00016021226658676947, |
|
"loss": 0.0404, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.6545814106789716, |
|
"grad_norm": 0.36032500863075256, |
|
"learning_rate": 0.00015986347079830382, |
|
"loss": 0.0311, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.6611733684904415, |
|
"grad_norm": 0.23349802196025848, |
|
"learning_rate": 0.00015951353624481257, |
|
"loss": 0.0248, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.6677653263019117, |
|
"grad_norm": 0.3381997048854828, |
|
"learning_rate": 0.0001591624695829968, |
|
"loss": 0.0316, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.6743572841133818, |
|
"grad_norm": 0.39666473865509033, |
|
"learning_rate": 0.0001588102774910933, |
|
"loss": 0.0399, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.6809492419248517, |
|
"grad_norm": 0.38981807231903076, |
|
"learning_rate": 0.00015845696666874772, |
|
"loss": 0.0325, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.6875411997363217, |
|
"grad_norm": 0.614475667476654, |
|
"learning_rate": 0.00015810254383688682, |
|
"loss": 0.0386, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.6941331575477916, |
|
"grad_norm": 0.6012241244316101, |
|
"learning_rate": 0.0001577470157375909, |
|
"loss": 0.0426, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.7007251153592617, |
|
"grad_norm": 0.8984513878822327, |
|
"learning_rate": 0.00015739038913396546, |
|
"loss": 0.0385, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 0.5758917331695557, |
|
"learning_rate": 0.00015703267081001237, |
|
"loss": 0.0327, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.7139090309822018, |
|
"grad_norm": 0.39728182554244995, |
|
"learning_rate": 0.00015667386757050106, |
|
"loss": 0.0359, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.7205009887936717, |
|
"grad_norm": 0.44694146513938904, |
|
"learning_rate": 0.00015631398624083907, |
|
"loss": 0.032, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.7270929466051417, |
|
"grad_norm": 0.5872260332107544, |
|
"learning_rate": 0.000155953033666942, |
|
"loss": 0.0307, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.7336849044166116, |
|
"grad_norm": 0.5661513209342957, |
|
"learning_rate": 0.00015559101671510349, |
|
"loss": 0.0326, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.7402768622280818, |
|
"grad_norm": 0.3842809796333313, |
|
"learning_rate": 0.00015522794227186443, |
|
"loss": 0.0326, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.746868820039552, |
|
"grad_norm": 0.24816927313804626, |
|
"learning_rate": 0.00015486381724388222, |
|
"loss": 0.0251, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.7534607778510218, |
|
"grad_norm": 0.2353767305612564, |
|
"learning_rate": 0.00015449864855779903, |
|
"loss": 0.0272, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.7600527356624918, |
|
"grad_norm": 0.25328564643859863, |
|
"learning_rate": 0.00015413244316011038, |
|
"loss": 0.0338, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.7666446934739617, |
|
"grad_norm": 0.37852951884269714, |
|
"learning_rate": 0.0001537652080170328, |
|
"loss": 0.0308, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.7732366512854316, |
|
"grad_norm": 0.294085294008255, |
|
"learning_rate": 0.00015339695011437127, |
|
"loss": 0.0236, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.7798286090969018, |
|
"grad_norm": 0.3499051034450531, |
|
"learning_rate": 0.00015302767645738655, |
|
"loss": 0.0305, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.786420566908372, |
|
"grad_norm": 0.4269741177558899, |
|
"learning_rate": 0.00015265739407066176, |
|
"loss": 0.0279, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.7930125247198418, |
|
"grad_norm": 0.3368455767631531, |
|
"learning_rate": 0.00015228610999796875, |
|
"loss": 0.0306, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.7996044825313118, |
|
"grad_norm": 0.36064472794532776, |
|
"learning_rate": 0.00015191383130213417, |
|
"loss": 0.0281, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.8061964403427817, |
|
"grad_norm": 0.42101433873176575, |
|
"learning_rate": 0.00015154056506490505, |
|
"loss": 0.0299, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.8127883981542519, |
|
"grad_norm": 0.3719172179698944, |
|
"learning_rate": 0.0001511663183868142, |
|
"loss": 0.0323, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.8193803559657218, |
|
"grad_norm": 0.3902226984500885, |
|
"learning_rate": 0.00015079109838704504, |
|
"loss": 0.0327, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.825972313777192, |
|
"grad_norm": 0.36405107378959656, |
|
"learning_rate": 0.00015041491220329616, |
|
"loss": 0.0278, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.8325642715886619, |
|
"grad_norm": 0.31391507387161255, |
|
"learning_rate": 0.0001500377669916456, |
|
"loss": 0.0325, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.8391562294001318, |
|
"grad_norm": 0.4089469611644745, |
|
"learning_rate": 0.0001496596699264147, |
|
"loss": 0.0253, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.8457481872116017, |
|
"grad_norm": 0.5822712779045105, |
|
"learning_rate": 0.00014928062820003166, |
|
"loss": 0.0337, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.8523401450230719, |
|
"grad_norm": 0.5532752275466919, |
|
"learning_rate": 0.00014890064902289466, |
|
"loss": 0.0316, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.858932102834542, |
|
"grad_norm": 0.39222195744514465, |
|
"learning_rate": 0.0001485197396232348, |
|
"loss": 0.0304, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.865524060646012, |
|
"grad_norm": 0.3746655285358429, |
|
"learning_rate": 0.00014813790724697832, |
|
"loss": 0.0361, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.8721160184574819, |
|
"grad_norm": 0.5020349621772766, |
|
"learning_rate": 0.0001477551591576092, |
|
"loss": 0.0351, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.8787079762689518, |
|
"grad_norm": 0.40259358286857605, |
|
"learning_rate": 0.00014737150263603063, |
|
"loss": 0.027, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.8852999340804217, |
|
"grad_norm": 0.6693785190582275, |
|
"learning_rate": 0.00014698694498042675, |
|
"loss": 0.0345, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.8918918918918919, |
|
"grad_norm": 0.6384851932525635, |
|
"learning_rate": 0.00014660149350612353, |
|
"loss": 0.0315, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.898483849703362, |
|
"grad_norm": 0.5224544405937195, |
|
"learning_rate": 0.00014621515554544997, |
|
"loss": 0.0259, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.905075807514832, |
|
"grad_norm": 0.5825631022453308, |
|
"learning_rate": 0.0001458279384475983, |
|
"loss": 0.0415, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.911667765326302, |
|
"grad_norm": 0.36511966586112976, |
|
"learning_rate": 0.0001454398495784844, |
|
"loss": 0.033, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.9182597231377718, |
|
"grad_norm": 0.4093778431415558, |
|
"learning_rate": 0.00014505089632060753, |
|
"loss": 0.0309, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.924851680949242, |
|
"grad_norm": 0.4290638566017151, |
|
"learning_rate": 0.00014466108607291003, |
|
"loss": 0.0309, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.931443638760712, |
|
"grad_norm": 0.6213640570640564, |
|
"learning_rate": 0.00014427042625063646, |
|
"loss": 0.0358, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.938035596572182, |
|
"grad_norm": 0.6244672536849976, |
|
"learning_rate": 0.00014387892428519258, |
|
"loss": 0.0387, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.944627554383652, |
|
"grad_norm": 0.380691796541214, |
|
"learning_rate": 0.000143486587624004, |
|
"loss": 0.0464, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 0.4133692979812622, |
|
"learning_rate": 0.00014309342373037455, |
|
"loss": 0.0329, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.9578114700065918, |
|
"grad_norm": 0.4502374529838562, |
|
"learning_rate": 0.00014269944008334418, |
|
"loss": 0.0334, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.964403427818062, |
|
"grad_norm": 0.5235921740531921, |
|
"learning_rate": 0.00014230464417754675, |
|
"loss": 0.033, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.9709953856295321, |
|
"grad_norm": 0.5345565676689148, |
|
"learning_rate": 0.00014190904352306757, |
|
"loss": 0.0371, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.977587343441002, |
|
"grad_norm": 0.34067875146865845, |
|
"learning_rate": 0.0001415126456453004, |
|
"loss": 0.0408, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.984179301252472, |
|
"grad_norm": 0.36922353506088257, |
|
"learning_rate": 0.00014111545808480434, |
|
"loss": 0.0315, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.990771259063942, |
|
"grad_norm": 0.36315643787384033, |
|
"learning_rate": 0.0001407174883971604, |
|
"loss": 0.0311, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.9973632168754119, |
|
"grad_norm": 0.35053545236587524, |
|
"learning_rate": 0.0001403187441528277, |
|
"loss": 0.0367, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.0039551746868822, |
|
"grad_norm": 0.5017916560173035, |
|
"learning_rate": 0.00013991923293699956, |
|
"loss": 0.0353, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.010547132498352, |
|
"grad_norm": 0.3657391667366028, |
|
"learning_rate": 0.00013951896234945925, |
|
"loss": 0.0404, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.017139090309822, |
|
"grad_norm": 0.5382429957389832, |
|
"learning_rate": 0.00013911794000443528, |
|
"loss": 0.0346, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.023731048121292, |
|
"grad_norm": 0.5115209221839905, |
|
"learning_rate": 0.0001387161735304566, |
|
"loss": 0.0288, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.030323005932762, |
|
"grad_norm": 0.5078955888748169, |
|
"learning_rate": 0.00013831367057020748, |
|
"loss": 0.0323, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.036914963744232, |
|
"grad_norm": 0.4034331440925598, |
|
"learning_rate": 0.00013791043878038224, |
|
"loss": 0.0397, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.0435069215557022, |
|
"grad_norm": 0.23669302463531494, |
|
"learning_rate": 0.0001375064858315394, |
|
"loss": 0.0314, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.050098879367172, |
|
"grad_norm": 0.3059588074684143, |
|
"learning_rate": 0.000137101819407956, |
|
"loss": 0.0276, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.056690837178642, |
|
"grad_norm": 0.5819403529167175, |
|
"learning_rate": 0.00013669644720748118, |
|
"loss": 0.0285, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.063282794990112, |
|
"grad_norm": 0.6815973520278931, |
|
"learning_rate": 0.00013629037694138995, |
|
"loss": 0.0329, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.069874752801582, |
|
"grad_norm": 0.28361934423446655, |
|
"learning_rate": 0.0001358836163342364, |
|
"loss": 0.0271, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.076466710613052, |
|
"grad_norm": 0.2907734513282776, |
|
"learning_rate": 0.00013547617312370663, |
|
"loss": 0.0309, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.0830586684245223, |
|
"grad_norm": 0.5272607207298279, |
|
"learning_rate": 0.00013506805506047198, |
|
"loss": 0.0308, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.089650626235992, |
|
"grad_norm": 0.23821255564689636, |
|
"learning_rate": 0.00013465926990804107, |
|
"loss": 0.0341, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.096242584047462, |
|
"grad_norm": 0.5370649099349976, |
|
"learning_rate": 0.00013424982544261248, |
|
"loss": 0.0316, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.102834541858932, |
|
"grad_norm": 0.3361760675907135, |
|
"learning_rate": 0.00013383972945292665, |
|
"loss": 0.0248, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.109426499670402, |
|
"grad_norm": 0.48819541931152344, |
|
"learning_rate": 0.00013342898974011774, |
|
"loss": 0.0347, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.1160184574818723, |
|
"grad_norm": 0.24430608749389648, |
|
"learning_rate": 0.00013301761411756543, |
|
"loss": 0.0269, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.1226104152933423, |
|
"grad_norm": 0.4588664770126343, |
|
"learning_rate": 0.00013260561041074598, |
|
"loss": 0.0276, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.129202373104812, |
|
"grad_norm": 0.5559895634651184, |
|
"learning_rate": 0.0001321929864570835, |
|
"loss": 0.0257, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.135794330916282, |
|
"grad_norm": 0.547458827495575, |
|
"learning_rate": 0.00013177975010580085, |
|
"loss": 0.0223, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.142386288727752, |
|
"grad_norm": 0.3017808198928833, |
|
"learning_rate": 0.00013136590921777053, |
|
"loss": 0.031, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.148978246539222, |
|
"grad_norm": 0.44043952226638794, |
|
"learning_rate": 0.00013095147166536486, |
|
"loss": 0.0276, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.1555702043506924, |
|
"grad_norm": 0.4227822422981262, |
|
"learning_rate": 0.0001305364453323062, |
|
"loss": 0.0296, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 0.4026118516921997, |
|
"learning_rate": 0.0001301208381135173, |
|
"loss": 0.0301, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.168754119973632, |
|
"grad_norm": 0.5354869961738586, |
|
"learning_rate": 0.0001297046579149708, |
|
"loss": 0.0286, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.175346077785102, |
|
"grad_norm": 0.42211246490478516, |
|
"learning_rate": 0.00012928791265353902, |
|
"loss": 0.0336, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.181938035596572, |
|
"grad_norm": 0.3645992577075958, |
|
"learning_rate": 0.00012887061025684333, |
|
"loss": 0.0242, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.188529993408042, |
|
"grad_norm": 0.2105298638343811, |
|
"learning_rate": 0.00012845275866310324, |
|
"loss": 0.0228, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 0.25215044617652893, |
|
"learning_rate": 0.00012803436582098558, |
|
"loss": 0.0243, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.2017139090309823, |
|
"grad_norm": 0.4196263253688812, |
|
"learning_rate": 0.00012761543968945306, |
|
"loss": 0.0282, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.2083058668424522, |
|
"grad_norm": 0.1937485933303833, |
|
"learning_rate": 0.00012719598823761308, |
|
"loss": 0.0278, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.214897824653922, |
|
"grad_norm": 0.5221042037010193, |
|
"learning_rate": 0.00012677601944456604, |
|
"loss": 0.0311, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.221489782465392, |
|
"grad_norm": 0.2941031754016876, |
|
"learning_rate": 0.0001263555412992535, |
|
"loss": 0.0303, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.2280817402768625, |
|
"grad_norm": 0.31689217686653137, |
|
"learning_rate": 0.00012593456180030646, |
|
"loss": 0.0252, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.2346736980883324, |
|
"grad_norm": 0.42106205224990845, |
|
"learning_rate": 0.0001255130889558928, |
|
"loss": 0.0249, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.2412656558998023, |
|
"grad_norm": 0.576701283454895, |
|
"learning_rate": 0.0001250911307835653, |
|
"loss": 0.0303, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.2478576137112722, |
|
"grad_norm": 0.49954476952552795, |
|
"learning_rate": 0.00012466869531010895, |
|
"loss": 0.0323, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.254449571522742, |
|
"grad_norm": 0.4963241517543793, |
|
"learning_rate": 0.0001242457905713883, |
|
"loss": 0.0316, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.261041529334212, |
|
"grad_norm": 0.23066122829914093, |
|
"learning_rate": 0.00012382242461219452, |
|
"loss": 0.0226, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.267633487145682, |
|
"grad_norm": 0.540354311466217, |
|
"learning_rate": 0.00012339860548609262, |
|
"loss": 0.0365, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.2742254449571524, |
|
"grad_norm": 0.48116335272789, |
|
"learning_rate": 0.0001229743412552679, |
|
"loss": 0.0268, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.2808174027686223, |
|
"grad_norm": 0.4430583417415619, |
|
"learning_rate": 0.00012254963999037285, |
|
"loss": 0.0263, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.2874093605800923, |
|
"grad_norm": 0.42470598220825195, |
|
"learning_rate": 0.0001221245097703735, |
|
"loss": 0.0354, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.294001318391562, |
|
"grad_norm": 0.31455087661743164, |
|
"learning_rate": 0.00012169895868239574, |
|
"loss": 0.0241, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.300593276203032, |
|
"grad_norm": 0.3215204179286957, |
|
"learning_rate": 0.00012127299482157149, |
|
"loss": 0.0332, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.3071852340145025, |
|
"grad_norm": 0.3963293135166168, |
|
"learning_rate": 0.00012084662629088481, |
|
"loss": 0.025, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.3137771918259724, |
|
"grad_norm": 0.4304813742637634, |
|
"learning_rate": 0.00012041986120101764, |
|
"loss": 0.0354, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.3203691496374423, |
|
"grad_norm": 0.3873739242553711, |
|
"learning_rate": 0.00011999270767019553, |
|
"loss": 0.0277, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.3269611074489123, |
|
"grad_norm": 0.4315703809261322, |
|
"learning_rate": 0.00011956517382403321, |
|
"loss": 0.0301, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.333553065260382, |
|
"grad_norm": 0.4416598081588745, |
|
"learning_rate": 0.00011913726779538008, |
|
"loss": 0.0283, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.3401450230718526, |
|
"grad_norm": 0.3677782416343689, |
|
"learning_rate": 0.0001187089977241654, |
|
"loss": 0.0355, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.3467369808833225, |
|
"grad_norm": 0.4988672733306885, |
|
"learning_rate": 0.00011828037175724356, |
|
"loss": 0.0314, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.3533289386947924, |
|
"grad_norm": 0.4604177474975586, |
|
"learning_rate": 0.00011785139804823906, |
|
"loss": 0.0337, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.3599208965062624, |
|
"grad_norm": 0.3596359193325043, |
|
"learning_rate": 0.00011742208475739133, |
|
"loss": 0.0295, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.3665128543177323, |
|
"grad_norm": 0.16485251486301422, |
|
"learning_rate": 0.0001169924400513996, |
|
"loss": 0.0275, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.373104812129202, |
|
"grad_norm": 0.3272377550601959, |
|
"learning_rate": 0.00011656247210326748, |
|
"loss": 0.0305, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.379696769940672, |
|
"grad_norm": 0.32883545756340027, |
|
"learning_rate": 0.0001161321890921476, |
|
"loss": 0.0314, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.3862887277521425, |
|
"grad_norm": 0.49502697587013245, |
|
"learning_rate": 0.00011570159920318584, |
|
"loss": 0.0323, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.3928806855636124, |
|
"grad_norm": 0.3317064344882965, |
|
"learning_rate": 0.00011527071062736583, |
|
"loss": 0.0284, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.3994726433750824, |
|
"grad_norm": 0.29318150877952576, |
|
"learning_rate": 0.00011483953156135292, |
|
"loss": 0.0226, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.4060646011865523, |
|
"grad_norm": 0.48932701349258423, |
|
"learning_rate": 0.00011440807020733843, |
|
"loss": 0.0287, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.4126565589980222, |
|
"grad_norm": 0.358005166053772, |
|
"learning_rate": 0.00011397633477288359, |
|
"loss": 0.0235, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.4192485168094926, |
|
"grad_norm": 0.3554854691028595, |
|
"learning_rate": 0.00011354433347076331, |
|
"loss": 0.0269, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.4258404746209625, |
|
"grad_norm": 0.3954286277294159, |
|
"learning_rate": 0.00011311207451881008, |
|
"loss": 0.0264, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.4324324324324325, |
|
"grad_norm": 0.3300182819366455, |
|
"learning_rate": 0.00011267956613975752, |
|
"loss": 0.0291, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 0.22343868017196655, |
|
"learning_rate": 0.00011224681656108411, |
|
"loss": 0.0251, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.4456163480553723, |
|
"grad_norm": 0.3663915991783142, |
|
"learning_rate": 0.00011181383401485656, |
|
"loss": 0.0295, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.4522083058668427, |
|
"grad_norm": 0.39715585112571716, |
|
"learning_rate": 0.00011138062673757325, |
|
"loss": 0.0299, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.4588002636783126, |
|
"grad_norm": 0.3747979402542114, |
|
"learning_rate": 0.00011094720297000753, |
|
"loss": 0.0295, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.4653922214897825, |
|
"grad_norm": 0.2834596037864685, |
|
"learning_rate": 0.00011051357095705101, |
|
"loss": 0.0284, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.4719841793012525, |
|
"grad_norm": 0.3044513165950775, |
|
"learning_rate": 0.0001100797389475567, |
|
"loss": 0.0272, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.4785761371127224, |
|
"grad_norm": 0.39235764741897583, |
|
"learning_rate": 0.00010964571519418207, |
|
"loss": 0.024, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.4851680949241923, |
|
"grad_norm": 0.31392836570739746, |
|
"learning_rate": 0.00010921150795323207, |
|
"loss": 0.0229, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.4917600527356623, |
|
"grad_norm": 0.3227923512458801, |
|
"learning_rate": 0.00010877712548450207, |
|
"loss": 0.0235, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.4983520105471326, |
|
"grad_norm": 0.35434576869010925, |
|
"learning_rate": 0.00010834257605112079, |
|
"loss": 0.0265, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.5049439683586026, |
|
"grad_norm": 0.3610621988773346, |
|
"learning_rate": 0.00010790786791939301, |
|
"loss": 0.0286, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.5115359261700725, |
|
"grad_norm": 0.26061367988586426, |
|
"learning_rate": 0.00010747300935864243, |
|
"loss": 0.0302, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.5181278839815424, |
|
"grad_norm": 0.3455495536327362, |
|
"learning_rate": 0.00010703800864105429, |
|
"loss": 0.0283, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.5247198417930123, |
|
"grad_norm": 0.5354321002960205, |
|
"learning_rate": 0.00010660287404151807, |
|
"loss": 0.0279, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.5313117996044827, |
|
"grad_norm": 0.23394666612148285, |
|
"learning_rate": 0.00010616761383747, |
|
"loss": 0.0318, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.5379037574159526, |
|
"grad_norm": 0.3995780348777771, |
|
"learning_rate": 0.00010573223630873565, |
|
"loss": 0.0265, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.5444957152274226, |
|
"grad_norm": 0.4800235331058502, |
|
"learning_rate": 0.00010529674973737252, |
|
"loss": 0.0281, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.5510876730388925, |
|
"grad_norm": 0.2611030042171478, |
|
"learning_rate": 0.00010486116240751223, |
|
"loss": 0.0297, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.5576796308503624, |
|
"grad_norm": 0.3945279121398926, |
|
"learning_rate": 0.0001044254826052032, |
|
"loss": 0.025, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.564271588661833, |
|
"grad_norm": 0.5326240658760071, |
|
"learning_rate": 0.00010398971861825297, |
|
"loss": 0.0264, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.5708635464733027, |
|
"grad_norm": 0.3610016703605652, |
|
"learning_rate": 0.00010355387873607036, |
|
"loss": 0.0259, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.5774555042847727, |
|
"grad_norm": 0.3786564767360687, |
|
"learning_rate": 0.0001031179712495081, |
|
"loss": 0.0253, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.5840474620962426, |
|
"grad_norm": 0.5698022246360779, |
|
"learning_rate": 0.0001026820044507048, |
|
"loss": 0.021, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.5906394199077125, |
|
"grad_norm": 0.4795434772968292, |
|
"learning_rate": 0.00010224598663292737, |
|
"loss": 0.0267, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.5972313777191824, |
|
"grad_norm": 0.4011961817741394, |
|
"learning_rate": 0.00010180992609041325, |
|
"loss": 0.035, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.6038233355306524, |
|
"grad_norm": 0.5173267126083374, |
|
"learning_rate": 0.00010137383111821266, |
|
"loss": 0.0298, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.6104152933421227, |
|
"grad_norm": 0.47045668959617615, |
|
"learning_rate": 0.00010093771001203076, |
|
"loss": 0.0296, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.6170072511535927, |
|
"grad_norm": 0.5313148498535156, |
|
"learning_rate": 0.0001005015710680698, |
|
"loss": 0.026, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.6235992089650626, |
|
"grad_norm": 0.40992313623428345, |
|
"learning_rate": 0.00010006542258287139, |
|
"loss": 0.0213, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.6301911667765325, |
|
"grad_norm": 0.2713076174259186, |
|
"learning_rate": 9.96292728531586e-05, |
|
"loss": 0.0238, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.6367831245880025, |
|
"grad_norm": 0.41798898577690125, |
|
"learning_rate": 9.919313017567822e-05, |
|
"loss": 0.0269, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.643375082399473, |
|
"grad_norm": 0.26005855202674866, |
|
"learning_rate": 9.875700284704286e-05, |
|
"loss": 0.0262, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.6499670402109428, |
|
"grad_norm": 0.24366049468517303, |
|
"learning_rate": 9.83208991635732e-05, |
|
"loss": 0.0234, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.6565589980224127, |
|
"grad_norm": 0.424334317445755, |
|
"learning_rate": 9.788482742114003e-05, |
|
"loss": 0.0296, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.6631509558338826, |
|
"grad_norm": 0.3093094229698181, |
|
"learning_rate": 9.744879591500662e-05, |
|
"loss": 0.0282, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.6697429136453525, |
|
"grad_norm": 0.42985987663269043, |
|
"learning_rate": 9.701281293967083e-05, |
|
"loss": 0.031, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.676334871456823, |
|
"grad_norm": 0.3328607380390167, |
|
"learning_rate": 9.657688678870728e-05, |
|
"loss": 0.0318, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 0.35078462958335876, |
|
"learning_rate": 9.614102575460973e-05, |
|
"loss": 0.0268, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.6895187870797628, |
|
"grad_norm": 0.4191462993621826, |
|
"learning_rate": 9.57052381286331e-05, |
|
"loss": 0.03, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.6961107448912327, |
|
"grad_norm": 0.4283992648124695, |
|
"learning_rate": 9.526953220063603e-05, |
|
"loss": 0.0235, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 0.35658934712409973, |
|
"learning_rate": 9.483391625892293e-05, |
|
"loss": 0.0243, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.7092946605141726, |
|
"grad_norm": 0.2613814175128937, |
|
"learning_rate": 9.439839859008653e-05, |
|
"loss": 0.0232, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.7158866183256425, |
|
"grad_norm": 0.24698810279369354, |
|
"learning_rate": 9.396298747885013e-05, |
|
"loss": 0.0232, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.722478576137113, |
|
"grad_norm": 0.25733861327171326, |
|
"learning_rate": 9.352769120790988e-05, |
|
"loss": 0.0231, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.729070533948583, |
|
"grad_norm": 0.288001149892807, |
|
"learning_rate": 9.309251805777754e-05, |
|
"loss": 0.0247, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.7356624917600527, |
|
"grad_norm": 0.47979527711868286, |
|
"learning_rate": 9.265747630662265e-05, |
|
"loss": 0.0315, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.7422544495715226, |
|
"grad_norm": 0.5932050943374634, |
|
"learning_rate": 9.22225742301153e-05, |
|
"loss": 0.0252, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.7488464073829926, |
|
"grad_norm": 0.3525910973548889, |
|
"learning_rate": 9.178782010126844e-05, |
|
"loss": 0.0249, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.755438365194463, |
|
"grad_norm": 0.27204054594039917, |
|
"learning_rate": 9.135322219028079e-05, |
|
"loss": 0.025, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.762030323005933, |
|
"grad_norm": 0.3478144407272339, |
|
"learning_rate": 9.091878876437933e-05, |
|
"loss": 0.0216, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.768622280817403, |
|
"grad_norm": 0.29393240809440613, |
|
"learning_rate": 9.04845280876621e-05, |
|
"loss": 0.0214, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.7752142386288727, |
|
"grad_norm": 0.21876759827136993, |
|
"learning_rate": 9.005044842094101e-05, |
|
"loss": 0.0245, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.7818061964403427, |
|
"grad_norm": 0.423742413520813, |
|
"learning_rate": 8.961655802158456e-05, |
|
"loss": 0.0241, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.788398154251813, |
|
"grad_norm": 0.38848140835762024, |
|
"learning_rate": 8.918286514336099e-05, |
|
"loss": 0.0238, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.794990112063283, |
|
"grad_norm": 0.28686466813087463, |
|
"learning_rate": 8.874937803628115e-05, |
|
"loss": 0.022, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.801582069874753, |
|
"grad_norm": 0.3457236588001251, |
|
"learning_rate": 8.831610494644148e-05, |
|
"loss": 0.0345, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.808174027686223, |
|
"grad_norm": 0.339136004447937, |
|
"learning_rate": 8.788305411586736e-05, |
|
"loss": 0.0194, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.8147659854976927, |
|
"grad_norm": 0.3297877907752991, |
|
"learning_rate": 8.745023378235602e-05, |
|
"loss": 0.0199, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.8213579433091627, |
|
"grad_norm": 0.39552271366119385, |
|
"learning_rate": 8.701765217932022e-05, |
|
"loss": 0.0266, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.8279499011206326, |
|
"grad_norm": 0.40580829977989197, |
|
"learning_rate": 8.658531753563122e-05, |
|
"loss": 0.0367, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.834541858932103, |
|
"grad_norm": 0.3342481553554535, |
|
"learning_rate": 8.615323807546258e-05, |
|
"loss": 0.0223, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.841133816743573, |
|
"grad_norm": 0.25729164481163025, |
|
"learning_rate": 8.572142201813363e-05, |
|
"loss": 0.023, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.847725774555043, |
|
"grad_norm": 0.3168254792690277, |
|
"learning_rate": 8.528987757795286e-05, |
|
"loss": 0.0237, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.8543177323665128, |
|
"grad_norm": 0.4179421365261078, |
|
"learning_rate": 8.485861296406207e-05, |
|
"loss": 0.0268, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.8609096901779827, |
|
"grad_norm": 0.46458080410957336, |
|
"learning_rate": 8.442763638027985e-05, |
|
"loss": 0.0216, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.867501647989453, |
|
"grad_norm": 0.35828524827957153, |
|
"learning_rate": 8.399695602494581e-05, |
|
"loss": 0.0204, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.874093605800923, |
|
"grad_norm": 0.34387773275375366, |
|
"learning_rate": 8.356658009076441e-05, |
|
"loss": 0.0239, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.880685563612393, |
|
"grad_norm": 0.3083021342754364, |
|
"learning_rate": 8.313651676464923e-05, |
|
"loss": 0.0228, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.887277521423863, |
|
"grad_norm": 0.2175825834274292, |
|
"learning_rate": 8.270677422756725e-05, |
|
"loss": 0.0201, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.8938694792353328, |
|
"grad_norm": 0.2774793803691864, |
|
"learning_rate": 8.227736065438302e-05, |
|
"loss": 0.0234, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.900461437046803, |
|
"grad_norm": 0.2598700523376465, |
|
"learning_rate": 8.184828421370348e-05, |
|
"loss": 0.0241, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.9070533948582726, |
|
"grad_norm": 0.3586549460887909, |
|
"learning_rate": 8.141955306772229e-05, |
|
"loss": 0.0162, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.913645352669743, |
|
"grad_norm": 0.26286324858665466, |
|
"learning_rate": 8.099117537206477e-05, |
|
"loss": 0.0212, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.920237310481213, |
|
"grad_norm": 0.4125373661518097, |
|
"learning_rate": 8.05631592756325e-05, |
|
"loss": 0.0202, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 0.29703447222709656, |
|
"learning_rate": 8.013551292044859e-05, |
|
"loss": 0.0213, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.933421226104153, |
|
"grad_norm": 0.3580416738986969, |
|
"learning_rate": 7.97082444415027e-05, |
|
"loss": 0.0226, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.9400131839156227, |
|
"grad_norm": 0.4119264781475067, |
|
"learning_rate": 7.928136196659614e-05, |
|
"loss": 0.0242, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.946605141727093, |
|
"grad_norm": 0.5699878931045532, |
|
"learning_rate": 7.885487361618754e-05, |
|
"loss": 0.0262, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.953197099538563, |
|
"grad_norm": 0.4126439094543457, |
|
"learning_rate": 7.842878750323801e-05, |
|
"loss": 0.021, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.959789057350033, |
|
"grad_norm": 0.42604967951774597, |
|
"learning_rate": 7.800311173305718e-05, |
|
"loss": 0.0219, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.966381015161503, |
|
"grad_norm": 0.19208472967147827, |
|
"learning_rate": 7.757785440314882e-05, |
|
"loss": 0.0284, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.972972972972973, |
|
"grad_norm": 0.43162015080451965, |
|
"learning_rate": 7.715302360305678e-05, |
|
"loss": 0.0192, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.979564930784443, |
|
"grad_norm": 0.7263951301574707, |
|
"learning_rate": 7.672862741421126e-05, |
|
"loss": 0.0299, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.986156888595913, |
|
"grad_norm": 0.3890402615070343, |
|
"learning_rate": 7.63046739097748e-05, |
|
"loss": 0.0222, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.992748846407383, |
|
"grad_norm": 0.25311848521232605, |
|
"learning_rate": 7.588117115448911e-05, |
|
"loss": 0.0208, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.999340804218853, |
|
"grad_norm": 0.33752700686454773, |
|
"learning_rate": 7.545812720452127e-05, |
|
"loss": 0.0263, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.005932762030323, |
|
"grad_norm": 0.2610788345336914, |
|
"learning_rate": 7.50355501073107e-05, |
|
"loss": 0.0246, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 3.012524719841793, |
|
"grad_norm": 0.32036837935447693, |
|
"learning_rate": 7.461344790141607e-05, |
|
"loss": 0.0283, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 3.019116677653263, |
|
"grad_norm": 0.4340413212776184, |
|
"learning_rate": 7.419182861636218e-05, |
|
"loss": 0.0293, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 3.025708635464733, |
|
"grad_norm": 0.39858514070510864, |
|
"learning_rate": 7.377070027248756e-05, |
|
"loss": 0.0186, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 3.032300593276203, |
|
"grad_norm": 0.26919031143188477, |
|
"learning_rate": 7.335007088079156e-05, |
|
"loss": 0.0208, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.038892551087673, |
|
"grad_norm": 0.4067997634410858, |
|
"learning_rate": 7.292994844278223e-05, |
|
"loss": 0.0261, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 3.045484508899143, |
|
"grad_norm": 0.4950489103794098, |
|
"learning_rate": 7.251034095032388e-05, |
|
"loss": 0.0292, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 3.052076466710613, |
|
"grad_norm": 0.2269221693277359, |
|
"learning_rate": 7.20912563854852e-05, |
|
"loss": 0.0175, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 3.058668424522083, |
|
"grad_norm": 0.32157209515571594, |
|
"learning_rate": 7.167270272038747e-05, |
|
"loss": 0.0187, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 3.065260382333553, |
|
"grad_norm": 0.2660551369190216, |
|
"learning_rate": 7.12546879170527e-05, |
|
"loss": 0.023, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.071852340145023, |
|
"grad_norm": 0.29758307337760925, |
|
"learning_rate": 7.08372199272524e-05, |
|
"loss": 0.0291, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 3.078444297956493, |
|
"grad_norm": 0.32291552424430847, |
|
"learning_rate": 7.042030669235606e-05, |
|
"loss": 0.0334, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 3.085036255767963, |
|
"grad_norm": 0.481623113155365, |
|
"learning_rate": 7.000395614318038e-05, |
|
"loss": 0.0192, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 3.0916282135794333, |
|
"grad_norm": 0.36292940378189087, |
|
"learning_rate": 6.958817619983822e-05, |
|
"loss": 0.0279, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 3.098220171390903, |
|
"grad_norm": 0.34903573989868164, |
|
"learning_rate": 6.917297477158792e-05, |
|
"loss": 0.0219, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.104812129202373, |
|
"grad_norm": 0.290768563747406, |
|
"learning_rate": 6.875835975668298e-05, |
|
"loss": 0.0245, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 3.111404087013843, |
|
"grad_norm": 0.4250969886779785, |
|
"learning_rate": 6.834433904222162e-05, |
|
"loss": 0.0239, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 3.117996044825313, |
|
"grad_norm": 0.31465357542037964, |
|
"learning_rate": 6.793092050399698e-05, |
|
"loss": 0.0227, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 3.124588002636783, |
|
"grad_norm": 0.46385765075683594, |
|
"learning_rate": 6.75181120063471e-05, |
|
"loss": 0.0271, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 3.1311799604482533, |
|
"grad_norm": 0.37862929701805115, |
|
"learning_rate": 6.710592140200542e-05, |
|
"loss": 0.0227, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.1377719182597232, |
|
"grad_norm": 0.49200916290283203, |
|
"learning_rate": 6.669435653195146e-05, |
|
"loss": 0.0201, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 3.144363876071193, |
|
"grad_norm": 0.4198756217956543, |
|
"learning_rate": 6.628342522526143e-05, |
|
"loss": 0.0216, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 3.150955833882663, |
|
"grad_norm": 0.5533847212791443, |
|
"learning_rate": 6.587313529895957e-05, |
|
"loss": 0.034, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 3.157547791694133, |
|
"grad_norm": 0.37719669938087463, |
|
"learning_rate": 6.546349455786926e-05, |
|
"loss": 0.0282, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 3.164139749505603, |
|
"grad_norm": 0.6606992483139038, |
|
"learning_rate": 6.505451079446467e-05, |
|
"loss": 0.0217, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.1707317073170733, |
|
"grad_norm": 0.20845943689346313, |
|
"learning_rate": 6.464619178872247e-05, |
|
"loss": 0.023, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 3.1773236651285433, |
|
"grad_norm": 0.23495689034461975, |
|
"learning_rate": 6.42385453079738e-05, |
|
"loss": 0.0256, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 3.183915622940013, |
|
"grad_norm": 0.1919371336698532, |
|
"learning_rate": 6.38315791067567e-05, |
|
"loss": 0.019, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 3.190507580751483, |
|
"grad_norm": 0.3485127091407776, |
|
"learning_rate": 6.342530092666821e-05, |
|
"loss": 0.0205, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 3.197099538562953, |
|
"grad_norm": 0.2419605702161789, |
|
"learning_rate": 6.301971849621757e-05, |
|
"loss": 0.0197, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.2036914963744234, |
|
"grad_norm": 0.23359638452529907, |
|
"learning_rate": 6.261483953067886e-05, |
|
"loss": 0.0215, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 3.2102834541858933, |
|
"grad_norm": 0.4236893355846405, |
|
"learning_rate": 6.221067173194442e-05, |
|
"loss": 0.0259, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 3.2168754119973633, |
|
"grad_norm": 0.35271692276000977, |
|
"learning_rate": 6.180722278837825e-05, |
|
"loss": 0.0229, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 3.223467369808833, |
|
"grad_norm": 0.5368591547012329, |
|
"learning_rate": 6.140450037466974e-05, |
|
"loss": 0.0227, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 3.230059327620303, |
|
"grad_norm": 0.3813161849975586, |
|
"learning_rate": 6.1002512151687796e-05, |
|
"loss": 0.0175, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.236651285431773, |
|
"grad_norm": 0.40781912207603455, |
|
"learning_rate": 6.060126576633497e-05, |
|
"loss": 0.0278, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 3.2432432432432434, |
|
"grad_norm": 0.3028331398963928, |
|
"learning_rate": 6.0200768851402133e-05, |
|
"loss": 0.0212, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 3.2498352010547134, |
|
"grad_norm": 0.20801442861557007, |
|
"learning_rate": 5.980102902542306e-05, |
|
"loss": 0.0244, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 3.2564271588661833, |
|
"grad_norm": 0.3236633241176605, |
|
"learning_rate": 5.9402053892529794e-05, |
|
"loss": 0.023, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 3.263019116677653, |
|
"grad_norm": 0.3075791895389557, |
|
"learning_rate": 5.9003851042307804e-05, |
|
"loss": 0.0193, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.269611074489123, |
|
"grad_norm": 0.33486539125442505, |
|
"learning_rate": 5.86064280496516e-05, |
|
"loss": 0.0212, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 3.276203032300593, |
|
"grad_norm": 0.4018231928348541, |
|
"learning_rate": 5.8209792474620815e-05, |
|
"loss": 0.0215, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 3.2827949901120634, |
|
"grad_norm": 0.35829004645347595, |
|
"learning_rate": 5.78139518622961e-05, |
|
"loss": 0.0228, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 3.2893869479235334, |
|
"grad_norm": 0.2682739496231079, |
|
"learning_rate": 5.741891374263593e-05, |
|
"loss": 0.0255, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 3.2959789057350033, |
|
"grad_norm": 0.3929627537727356, |
|
"learning_rate": 5.702468563033306e-05, |
|
"loss": 0.0228, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.3025708635464732, |
|
"grad_norm": 0.2807949483394623, |
|
"learning_rate": 5.663127502467184e-05, |
|
"loss": 0.0207, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 3.309162821357943, |
|
"grad_norm": 0.33235079050064087, |
|
"learning_rate": 5.6238689409385346e-05, |
|
"loss": 0.0243, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 3.3157547791694135, |
|
"grad_norm": 0.28995218873023987, |
|
"learning_rate": 5.5846936252513174e-05, |
|
"loss": 0.017, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 3.3223467369808835, |
|
"grad_norm": 0.2601809799671173, |
|
"learning_rate": 5.54560230062593e-05, |
|
"loss": 0.0166, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 3.3289386947923534, |
|
"grad_norm": 0.3650406301021576, |
|
"learning_rate": 5.5065957106850204e-05, |
|
"loss": 0.021, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 3.3355306526038233, |
|
"grad_norm": 0.48497456312179565, |
|
"learning_rate": 5.4676745974393764e-05, |
|
"loss": 0.0173, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 3.3421226104152932, |
|
"grad_norm": 0.3954178988933563, |
|
"learning_rate": 5.4288397012737646e-05, |
|
"loss": 0.02, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 3.348714568226763, |
|
"grad_norm": 0.21555176377296448, |
|
"learning_rate": 5.390091760932887e-05, |
|
"loss": 0.0208, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 3.3553065260382335, |
|
"grad_norm": 0.4477789103984833, |
|
"learning_rate": 5.3514315135073076e-05, |
|
"loss": 0.023, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 3.3618984838497035, |
|
"grad_norm": 0.4595910906791687, |
|
"learning_rate": 5.3128596944194234e-05, |
|
"loss": 0.027, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.3684904416611734, |
|
"grad_norm": 0.3426424264907837, |
|
"learning_rate": 5.274377037409497e-05, |
|
"loss": 0.0224, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 3.3750823994726433, |
|
"grad_norm": 0.2647363841533661, |
|
"learning_rate": 5.235984274521684e-05, |
|
"loss": 0.0238, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 3.3816743572841133, |
|
"grad_norm": 0.21992464363574982, |
|
"learning_rate": 5.197682136090107e-05, |
|
"loss": 0.0163, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 3.388266315095583, |
|
"grad_norm": 0.6907774209976196, |
|
"learning_rate": 5.159471350724978e-05, |
|
"loss": 0.0223, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 3.3948582729070536, |
|
"grad_norm": 0.44378501176834106, |
|
"learning_rate": 5.121352645298708e-05, |
|
"loss": 0.0245, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 3.4014502307185235, |
|
"grad_norm": 0.25844740867614746, |
|
"learning_rate": 5.083326744932117e-05, |
|
"loss": 0.0211, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 3.4080421885299934, |
|
"grad_norm": 0.3211382031440735, |
|
"learning_rate": 5.0453943729806094e-05, |
|
"loss": 0.0207, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 3.4146341463414633, |
|
"grad_norm": 0.25202128291130066, |
|
"learning_rate": 5.007556251020434e-05, |
|
"loss": 0.0215, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 3.4212261041529333, |
|
"grad_norm": 0.3003428876399994, |
|
"learning_rate": 4.9698130988349424e-05, |
|
"loss": 0.0207, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 3.4278180619644036, |
|
"grad_norm": 0.32026761770248413, |
|
"learning_rate": 4.9321656344009115e-05, |
|
"loss": 0.0196, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.4344100197758736, |
|
"grad_norm": 0.26623809337615967, |
|
"learning_rate": 4.894614573874877e-05, |
|
"loss": 0.0219, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 3.4410019775873435, |
|
"grad_norm": 0.35238540172576904, |
|
"learning_rate": 4.857160631579509e-05, |
|
"loss": 0.0152, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 3.4475939353988134, |
|
"grad_norm": 0.3443749248981476, |
|
"learning_rate": 4.819804519990033e-05, |
|
"loss": 0.0232, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 3.4541858932102834, |
|
"grad_norm": 0.35800328850746155, |
|
"learning_rate": 4.782546949720658e-05, |
|
"loss": 0.0217, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 3.4607778510217533, |
|
"grad_norm": 0.37850216031074524, |
|
"learning_rate": 4.745388629511084e-05, |
|
"loss": 0.0167, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.4673698088332237, |
|
"grad_norm": 0.24581514298915863, |
|
"learning_rate": 4.708330266212993e-05, |
|
"loss": 0.0179, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 3.4739617666446936, |
|
"grad_norm": 0.16642197966575623, |
|
"learning_rate": 4.671372564776629e-05, |
|
"loss": 0.0169, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 3.4805537244561635, |
|
"grad_norm": 0.32910865545272827, |
|
"learning_rate": 4.634516228237372e-05, |
|
"loss": 0.019, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 3.4871456822676334, |
|
"grad_norm": 0.21662920713424683, |
|
"learning_rate": 4.59776195770236e-05, |
|
"loss": 0.0162, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 3.4937376400791034, |
|
"grad_norm": 0.3485572934150696, |
|
"learning_rate": 4.561110452337171e-05, |
|
"loss": 0.0217, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.5003295978905733, |
|
"grad_norm": 0.20581798255443573, |
|
"learning_rate": 4.5245624093525e-05, |
|
"loss": 0.0296, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 3.5069215557020437, |
|
"grad_norm": 0.35009968280792236, |
|
"learning_rate": 4.488118523990915e-05, |
|
"loss": 0.0208, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 3.5135135135135136, |
|
"grad_norm": 0.39382439851760864, |
|
"learning_rate": 4.451779489513628e-05, |
|
"loss": 0.0217, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 3.5201054713249835, |
|
"grad_norm": 0.348563551902771, |
|
"learning_rate": 4.415545997187296e-05, |
|
"loss": 0.0165, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 3.5266974291364535, |
|
"grad_norm": 0.494354784488678, |
|
"learning_rate": 4.379418736270886e-05, |
|
"loss": 0.0232, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 3.5332893869479234, |
|
"grad_norm": 0.1578008085489273, |
|
"learning_rate": 4.343398394002547e-05, |
|
"loss": 0.0226, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 3.5398813447593938, |
|
"grad_norm": 0.3410768210887909, |
|
"learning_rate": 4.307485655586557e-05, |
|
"loss": 0.0219, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 3.5464733025708637, |
|
"grad_norm": 0.20960773527622223, |
|
"learning_rate": 4.271681204180268e-05, |
|
"loss": 0.0209, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 3.5530652603823336, |
|
"grad_norm": 0.22281195223331451, |
|
"learning_rate": 4.2359857208811284e-05, |
|
"loss": 0.0233, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 3.5596572181938035, |
|
"grad_norm": 0.3393511474132538, |
|
"learning_rate": 4.2003998847137174e-05, |
|
"loss": 0.0209, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.5662491760052735, |
|
"grad_norm": 0.6712432503700256, |
|
"learning_rate": 4.164924372616821e-05, |
|
"loss": 0.0249, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 3.572841133816744, |
|
"grad_norm": 0.18807201087474823, |
|
"learning_rate": 4.129559859430573e-05, |
|
"loss": 0.024, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 3.5794330916282133, |
|
"grad_norm": 0.4251366853713989, |
|
"learning_rate": 4.094307017883606e-05, |
|
"loss": 0.0174, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 3.5860250494396837, |
|
"grad_norm": 0.2247576266527176, |
|
"learning_rate": 4.0591665185802576e-05, |
|
"loss": 0.0214, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 3.5926170072511536, |
|
"grad_norm": 0.643822968006134, |
|
"learning_rate": 4.0241390299878e-05, |
|
"loss": 0.0222, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 3.5992089650626236, |
|
"grad_norm": 0.37506723403930664, |
|
"learning_rate": 3.989225218423753e-05, |
|
"loss": 0.0147, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 3.6058009228740935, |
|
"grad_norm": 0.3052820861339569, |
|
"learning_rate": 3.954425748043186e-05, |
|
"loss": 0.0191, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 3.6123928806855634, |
|
"grad_norm": 0.3424012362957001, |
|
"learning_rate": 3.9197412808260805e-05, |
|
"loss": 0.0214, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 3.618984838497034, |
|
"grad_norm": 0.24967588484287262, |
|
"learning_rate": 3.885172476564765e-05, |
|
"loss": 0.0157, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 3.6255767963085037, |
|
"grad_norm": 0.2771139442920685, |
|
"learning_rate": 3.850719992851326e-05, |
|
"loss": 0.0198, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.6321687541199736, |
|
"grad_norm": 0.3275032043457031, |
|
"learning_rate": 3.8163844850651346e-05, |
|
"loss": 0.0204, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 3.6387607119314436, |
|
"grad_norm": 0.3696538507938385, |
|
"learning_rate": 3.7821666063603566e-05, |
|
"loss": 0.0172, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 3.6453526697429135, |
|
"grad_norm": 0.43786558508872986, |
|
"learning_rate": 3.748067007653536e-05, |
|
"loss": 0.0199, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 3.651944627554384, |
|
"grad_norm": 0.15298739075660706, |
|
"learning_rate": 3.714086337611217e-05, |
|
"loss": 0.0118, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"grad_norm": 0.2643417716026306, |
|
"learning_rate": 3.680225242637583e-05, |
|
"loss": 0.0217, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 3.6651285431773237, |
|
"grad_norm": 0.29987242817878723, |
|
"learning_rate": 3.646484366862197e-05, |
|
"loss": 0.0218, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 3.6717205009887937, |
|
"grad_norm": 0.2553282678127289, |
|
"learning_rate": 3.6128643521277096e-05, |
|
"loss": 0.0192, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 3.6783124588002636, |
|
"grad_norm": 0.24411100149154663, |
|
"learning_rate": 3.57936583797768e-05, |
|
"loss": 0.0156, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 3.684904416611734, |
|
"grad_norm": 0.2638270854949951, |
|
"learning_rate": 3.5459894616443954e-05, |
|
"loss": 0.0188, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 3.6914963744232034, |
|
"grad_norm": 0.19742664694786072, |
|
"learning_rate": 3.5127358580367463e-05, |
|
"loss": 0.021, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.698088332234674, |
|
"grad_norm": 0.3131982386112213, |
|
"learning_rate": 3.479605659728159e-05, |
|
"loss": 0.0176, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 3.7046802900461437, |
|
"grad_norm": 0.24199941754341125, |
|
"learning_rate": 3.446599496944557e-05, |
|
"loss": 0.0178, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 3.7112722478576137, |
|
"grad_norm": 0.18790839612483978, |
|
"learning_rate": 3.413717997552376e-05, |
|
"loss": 0.012, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 3.7178642056690836, |
|
"grad_norm": 0.4031229317188263, |
|
"learning_rate": 3.380961787046605e-05, |
|
"loss": 0.022, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 3.7244561634805535, |
|
"grad_norm": 0.3094145357608795, |
|
"learning_rate": 3.348331488538913e-05, |
|
"loss": 0.0207, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 3.731048121292024, |
|
"grad_norm": 0.31893035769462585, |
|
"learning_rate": 3.315827722745779e-05, |
|
"loss": 0.0195, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 3.737640079103494, |
|
"grad_norm": 0.2687014639377594, |
|
"learning_rate": 3.28345110797668e-05, |
|
"loss": 0.0152, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 3.7442320369149638, |
|
"grad_norm": 0.3952026963233948, |
|
"learning_rate": 3.2512022601223515e-05, |
|
"loss": 0.0247, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 3.7508239947264337, |
|
"grad_norm": 0.25332149863243103, |
|
"learning_rate": 3.21908179264304e-05, |
|
"loss": 0.0142, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 3.7574159525379036, |
|
"grad_norm": 0.4335060119628906, |
|
"learning_rate": 3.187090316556861e-05, |
|
"loss": 0.0202, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.764007910349374, |
|
"grad_norm": 0.25930336117744446, |
|
"learning_rate": 3.155228440428164e-05, |
|
"loss": 0.0208, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 3.770599868160844, |
|
"grad_norm": 0.6695492267608643, |
|
"learning_rate": 3.123496770355956e-05, |
|
"loss": 0.0153, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 3.777191825972314, |
|
"grad_norm": 0.3357510566711426, |
|
"learning_rate": 3.091895909962375e-05, |
|
"loss": 0.021, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 3.7837837837837838, |
|
"grad_norm": 0.4220266342163086, |
|
"learning_rate": 3.060426460381195e-05, |
|
"loss": 0.0155, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 3.7903757415952537, |
|
"grad_norm": 0.2396579086780548, |
|
"learning_rate": 3.0290890202464182e-05, |
|
"loss": 0.017, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 3.796967699406724, |
|
"grad_norm": 0.4336076080799103, |
|
"learning_rate": 2.9978841856808525e-05, |
|
"loss": 0.0193, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 3.8035596572181936, |
|
"grad_norm": 0.4535181224346161, |
|
"learning_rate": 2.966812550284803e-05, |
|
"loss": 0.0151, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 3.810151615029664, |
|
"grad_norm": 0.2847338020801544, |
|
"learning_rate": 2.9358747051247637e-05, |
|
"loss": 0.0164, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 3.816743572841134, |
|
"grad_norm": 0.33757925033569336, |
|
"learning_rate": 2.905071238722169e-05, |
|
"loss": 0.0173, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 3.823335530652604, |
|
"grad_norm": 0.21222251653671265, |
|
"learning_rate": 2.8744027370422167e-05, |
|
"loss": 0.0186, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.8299274884640737, |
|
"grad_norm": 0.8053876757621765, |
|
"learning_rate": 2.843869783482701e-05, |
|
"loss": 0.0189, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 3.8365194462755436, |
|
"grad_norm": 0.2711152732372284, |
|
"learning_rate": 2.8134729588629303e-05, |
|
"loss": 0.0281, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 3.843111404087014, |
|
"grad_norm": 0.24810029566287994, |
|
"learning_rate": 2.7832128414126735e-05, |
|
"loss": 0.0169, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 3.849703361898484, |
|
"grad_norm": 0.3628500998020172, |
|
"learning_rate": 2.7530900067611577e-05, |
|
"loss": 0.0138, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 3.856295319709954, |
|
"grad_norm": 0.1820344775915146, |
|
"learning_rate": 2.7231050279261217e-05, |
|
"loss": 0.0201, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 3.862887277521424, |
|
"grad_norm": 0.5230331420898438, |
|
"learning_rate": 2.6932584753029068e-05, |
|
"loss": 0.0162, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 3.8694792353328937, |
|
"grad_norm": 0.27183738350868225, |
|
"learning_rate": 2.6635509166536243e-05, |
|
"loss": 0.0173, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 3.876071193144364, |
|
"grad_norm": 0.19195932149887085, |
|
"learning_rate": 2.633982917096335e-05, |
|
"loss": 0.0207, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 3.882663150955834, |
|
"grad_norm": 0.42282554507255554, |
|
"learning_rate": 2.6045550390943185e-05, |
|
"loss": 0.0159, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 3.889255108767304, |
|
"grad_norm": 0.2981650233268738, |
|
"learning_rate": 2.5752678424453514e-05, |
|
"loss": 0.0173, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.895847066578774, |
|
"grad_norm": 0.32203352451324463, |
|
"learning_rate": 2.5461218842710798e-05, |
|
"loss": 0.021, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 3.902439024390244, |
|
"grad_norm": 0.2388588786125183, |
|
"learning_rate": 2.517117719006411e-05, |
|
"loss": 0.0219, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 3.9090309822017137, |
|
"grad_norm": 0.40328285098075867, |
|
"learning_rate": 2.488255898388966e-05, |
|
"loss": 0.0169, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 3.9156229400131837, |
|
"grad_norm": 0.14190708100795746, |
|
"learning_rate": 2.4595369714485895e-05, |
|
"loss": 0.0167, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 3.922214897824654, |
|
"grad_norm": 0.418643593788147, |
|
"learning_rate": 2.430961484496893e-05, |
|
"loss": 0.0187, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 3.928806855636124, |
|
"grad_norm": 0.2280479073524475, |
|
"learning_rate": 2.4025299811168843e-05, |
|
"loss": 0.0151, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 3.935398813447594, |
|
"grad_norm": 0.5002431869506836, |
|
"learning_rate": 2.3742430021526018e-05, |
|
"loss": 0.019, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 3.941990771259064, |
|
"grad_norm": 0.22551734745502472, |
|
"learning_rate": 2.3461010856988473e-05, |
|
"loss": 0.013, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 3.9485827290705338, |
|
"grad_norm": 0.3069497048854828, |
|
"learning_rate": 2.318104767090944e-05, |
|
"loss": 0.018, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 3.955174686882004, |
|
"grad_norm": 0.36286690831184387, |
|
"learning_rate": 2.2902545788945396e-05, |
|
"loss": 0.024, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.961766644693474, |
|
"grad_norm": 0.2421414703130722, |
|
"learning_rate": 2.2625510508954952e-05, |
|
"loss": 0.0212, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 3.968358602504944, |
|
"grad_norm": 0.23019398748874664, |
|
"learning_rate": 2.234994710089795e-05, |
|
"loss": 0.0188, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 3.974950560316414, |
|
"grad_norm": 0.2802564203739166, |
|
"learning_rate": 2.207586080673528e-05, |
|
"loss": 0.0192, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 3.981542518127884, |
|
"grad_norm": 0.2667250633239746, |
|
"learning_rate": 2.1803256840329134e-05, |
|
"loss": 0.0213, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 3.988134475939354, |
|
"grad_norm": 0.4056625962257385, |
|
"learning_rate": 2.1532140387343735e-05, |
|
"loss": 0.0169, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 3.994726433750824, |
|
"grad_norm": 0.1790419965982437, |
|
"learning_rate": 2.126251660514691e-05, |
|
"loss": 0.0185, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 4.001318391562294, |
|
"grad_norm": 0.2861385941505432, |
|
"learning_rate": 2.0994390622711734e-05, |
|
"loss": 0.0191, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 4.0079103493737644, |
|
"grad_norm": 0.20970335602760315, |
|
"learning_rate": 2.0727767540519193e-05, |
|
"loss": 0.0171, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 4.014502307185234, |
|
"grad_norm": 0.2126467227935791, |
|
"learning_rate": 2.046265243046094e-05, |
|
"loss": 0.0175, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 4.021094264996704, |
|
"grad_norm": 0.4862785339355469, |
|
"learning_rate": 2.0199050335743007e-05, |
|
"loss": 0.0212, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.027686222808174, |
|
"grad_norm": 0.36454570293426514, |
|
"learning_rate": 1.9936966270789738e-05, |
|
"loss": 0.0159, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 4.034278180619644, |
|
"grad_norm": 0.1897134780883789, |
|
"learning_rate": 1.9676405221148475e-05, |
|
"loss": 0.0172, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 4.040870138431114, |
|
"grad_norm": 0.2542422115802765, |
|
"learning_rate": 1.9417372143394697e-05, |
|
"loss": 0.0251, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 4.047462096242584, |
|
"grad_norm": 0.20512335002422333, |
|
"learning_rate": 1.9159871965037657e-05, |
|
"loss": 0.0172, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 4.054054054054054, |
|
"grad_norm": 0.21565409004688263, |
|
"learning_rate": 1.8903909584426826e-05, |
|
"loss": 0.018, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 4.060646011865524, |
|
"grad_norm": 0.3546988368034363, |
|
"learning_rate": 1.86494898706585e-05, |
|
"loss": 0.0169, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 4.067237969676994, |
|
"grad_norm": 0.5294975638389587, |
|
"learning_rate": 1.8396617663483363e-05, |
|
"loss": 0.0159, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 4.073829927488464, |
|
"grad_norm": 0.2470693439245224, |
|
"learning_rate": 1.814529777321432e-05, |
|
"loss": 0.0211, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 4.080421885299934, |
|
"grad_norm": 0.4331272542476654, |
|
"learning_rate": 1.7895534980634954e-05, |
|
"loss": 0.0176, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 4.0870138431114045, |
|
"grad_norm": 0.3057391941547394, |
|
"learning_rate": 1.764733403690875e-05, |
|
"loss": 0.0203, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 4.093605800922874, |
|
"grad_norm": 0.11541125923395157, |
|
"learning_rate": 1.740069966348846e-05, |
|
"loss": 0.0193, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 4.100197758734344, |
|
"grad_norm": 0.28473731875419617, |
|
"learning_rate": 1.71556365520266e-05, |
|
"loss": 0.0196, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 4.106789716545814, |
|
"grad_norm": 0.14990141987800598, |
|
"learning_rate": 1.6912149364285958e-05, |
|
"loss": 0.0147, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 4.113381674357284, |
|
"grad_norm": 0.33358579874038696, |
|
"learning_rate": 1.667024273205092e-05, |
|
"loss": 0.02, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 4.119973632168755, |
|
"grad_norm": 0.2164691537618637, |
|
"learning_rate": 1.6429921257039592e-05, |
|
"loss": 0.0171, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 4.126565589980224, |
|
"grad_norm": 0.29503509402275085, |
|
"learning_rate": 1.619118951081594e-05, |
|
"loss": 0.0156, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 4.133157547791694, |
|
"grad_norm": 0.29893797636032104, |
|
"learning_rate": 1.5954052034703125e-05, |
|
"loss": 0.016, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 4.139749505603164, |
|
"grad_norm": 0.3970952033996582, |
|
"learning_rate": 1.5718513339696883e-05, |
|
"loss": 0.0191, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 4.146341463414634, |
|
"grad_norm": 0.2718060612678528, |
|
"learning_rate": 1.548457790637987e-05, |
|
"loss": 0.014, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 4.152933421226104, |
|
"grad_norm": 0.3720945119857788, |
|
"learning_rate": 1.525225018483638e-05, |
|
"loss": 0.0168, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.159525379037574, |
|
"grad_norm": 0.21513940393924713, |
|
"learning_rate": 1.5021534594567621e-05, |
|
"loss": 0.0159, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 4.1661173368490445, |
|
"grad_norm": 0.30618909001350403, |
|
"learning_rate": 1.4792435524407755e-05, |
|
"loss": 0.0151, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 4.172709294660514, |
|
"grad_norm": 0.409757524728775, |
|
"learning_rate": 1.4564957332440365e-05, |
|
"loss": 0.0177, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 4.179301252471984, |
|
"grad_norm": 0.2687203884124756, |
|
"learning_rate": 1.4339104345915554e-05, |
|
"loss": 0.0202, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 4.185893210283454, |
|
"grad_norm": 0.25398269295692444, |
|
"learning_rate": 1.4114880861167557e-05, |
|
"loss": 0.0189, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 4.192485168094924, |
|
"grad_norm": 0.2254013866186142, |
|
"learning_rate": 1.3892291143533154e-05, |
|
"loss": 0.0144, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 4.199077125906395, |
|
"grad_norm": 0.32205384969711304, |
|
"learning_rate": 1.3671339427270458e-05, |
|
"loss": 0.0161, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 4.205669083717864, |
|
"grad_norm": 0.3406763970851898, |
|
"learning_rate": 1.3452029915478304e-05, |
|
"loss": 0.02, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 4.2122610415293344, |
|
"grad_norm": 0.31815874576568604, |
|
"learning_rate": 1.3234366780016438e-05, |
|
"loss": 0.0185, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 4.218852999340804, |
|
"grad_norm": 0.1224733293056488, |
|
"learning_rate": 1.3018354161425994e-05, |
|
"loss": 0.0181, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.225444957152274, |
|
"grad_norm": 0.42326441407203674, |
|
"learning_rate": 1.2803996168850896e-05, |
|
"loss": 0.016, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 4.232036914963745, |
|
"grad_norm": 0.2917204797267914, |
|
"learning_rate": 1.2591296879959557e-05, |
|
"loss": 0.0146, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 4.238628872775214, |
|
"grad_norm": 0.27973493933677673, |
|
"learning_rate": 1.238026034086739e-05, |
|
"loss": 0.0167, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 4.2452208305866845, |
|
"grad_norm": 0.13871712982654572, |
|
"learning_rate": 1.2170890566059811e-05, |
|
"loss": 0.0161, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 4.251812788398154, |
|
"grad_norm": 0.2724437713623047, |
|
"learning_rate": 1.1963191538315833e-05, |
|
"loss": 0.0188, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 4.258404746209624, |
|
"grad_norm": 0.24582289159297943, |
|
"learning_rate": 1.1757167208632414e-05, |
|
"loss": 0.0142, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 4.264996704021094, |
|
"grad_norm": 0.6128583550453186, |
|
"learning_rate": 1.1552821496149135e-05, |
|
"loss": 0.015, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 4.271588661832564, |
|
"grad_norm": 0.38243502378463745, |
|
"learning_rate": 1.135015828807382e-05, |
|
"loss": 0.0135, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 4.278180619644035, |
|
"grad_norm": 0.22540901601314545, |
|
"learning_rate": 1.1149181439608514e-05, |
|
"loss": 0.0156, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 4.284772577455504, |
|
"grad_norm": 0.4100974500179291, |
|
"learning_rate": 1.0949894773876079e-05, |
|
"loss": 0.0156, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.2913645352669745, |
|
"grad_norm": 0.1929452121257782, |
|
"learning_rate": 1.0752302081847565e-05, |
|
"loss": 0.0184, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 4.297956493078444, |
|
"grad_norm": 0.27612316608428955, |
|
"learning_rate": 1.0556407122270096e-05, |
|
"loss": 0.0192, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 4.304548450889914, |
|
"grad_norm": 0.20837433636188507, |
|
"learning_rate": 1.0362213621595307e-05, |
|
"loss": 0.0135, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 4.311140408701385, |
|
"grad_norm": 0.38383790850639343, |
|
"learning_rate": 1.016972527390846e-05, |
|
"loss": 0.0186, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 4.317732366512854, |
|
"grad_norm": 0.3808279037475586, |
|
"learning_rate": 9.978945740858226e-06, |
|
"loss": 0.0172, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 4.324324324324325, |
|
"grad_norm": 0.12612776458263397, |
|
"learning_rate": 9.789878651587036e-06, |
|
"loss": 0.0131, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 4.330916282135794, |
|
"grad_norm": 0.47806084156036377, |
|
"learning_rate": 9.602527602661949e-06, |
|
"loss": 0.0175, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 4.337508239947264, |
|
"grad_norm": 0.5602189302444458, |
|
"learning_rate": 9.416896158006328e-06, |
|
"loss": 0.0161, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 4.344100197758735, |
|
"grad_norm": 0.5258492231369019, |
|
"learning_rate": 9.232987848832009e-06, |
|
"loss": 0.0151, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 4.350692155570204, |
|
"grad_norm": 0.18115440011024475, |
|
"learning_rate": 9.050806173572134e-06, |
|
"loss": 0.0115, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.357284113381675, |
|
"grad_norm": 0.2673959732055664, |
|
"learning_rate": 8.870354597814622e-06, |
|
"loss": 0.013, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 4.363876071193144, |
|
"grad_norm": 0.4614759385585785, |
|
"learning_rate": 8.691636554236182e-06, |
|
"loss": 0.0179, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 4.3704680290046145, |
|
"grad_norm": 0.31257471442222595, |
|
"learning_rate": 8.514655442537122e-06, |
|
"loss": 0.0152, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 4.377059986816084, |
|
"grad_norm": 0.1402910202741623, |
|
"learning_rate": 8.339414629376507e-06, |
|
"loss": 0.0155, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 4.383651944627554, |
|
"grad_norm": 0.19149114191532135, |
|
"learning_rate": 8.165917448308324e-06, |
|
"loss": 0.0132, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 4.390243902439025, |
|
"grad_norm": 0.31132665276527405, |
|
"learning_rate": 7.994167199717894e-06, |
|
"loss": 0.0159, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 4.396835860250494, |
|
"grad_norm": 0.30715203285217285, |
|
"learning_rate": 7.824167150759188e-06, |
|
"loss": 0.022, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 4.403427818061965, |
|
"grad_norm": 0.23801127076148987, |
|
"learning_rate": 7.655920535292682e-06, |
|
"loss": 0.0123, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 4.410019775873434, |
|
"grad_norm": 0.3437555730342865, |
|
"learning_rate": 7.4894305538237285e-06, |
|
"loss": 0.0154, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 4.4166117336849045, |
|
"grad_norm": 0.23300838470458984, |
|
"learning_rate": 7.324700373441828e-06, |
|
"loss": 0.0188, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 4.423203691496375, |
|
"grad_norm": 0.2827889621257782, |
|
"learning_rate": 7.161733127760228e-06, |
|
"loss": 0.0151, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 4.429795649307844, |
|
"grad_norm": 0.2165522575378418, |
|
"learning_rate": 7.000531916856512e-06, |
|
"loss": 0.0145, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 4.436387607119315, |
|
"grad_norm": 0.3993603587150574, |
|
"learning_rate": 6.841099807213392e-06, |
|
"loss": 0.024, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 4.442979564930784, |
|
"grad_norm": 0.21347716450691223, |
|
"learning_rate": 6.683439831660554e-06, |
|
"loss": 0.0254, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 4.4495715227422545, |
|
"grad_norm": 0.4783138036727905, |
|
"learning_rate": 6.527554989316897e-06, |
|
"loss": 0.0141, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 4.456163480553725, |
|
"grad_norm": 0.2551850378513336, |
|
"learning_rate": 6.373448245533464e-06, |
|
"loss": 0.0203, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 4.462755438365194, |
|
"grad_norm": 0.22933778166770935, |
|
"learning_rate": 6.221122531837076e-06, |
|
"loss": 0.0193, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 4.469347396176665, |
|
"grad_norm": 0.1832355260848999, |
|
"learning_rate": 6.070580745874544e-06, |
|
"loss": 0.0134, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 4.475939353988134, |
|
"grad_norm": 0.3792283535003662, |
|
"learning_rate": 5.921825751357557e-06, |
|
"loss": 0.0159, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 4.482531311799605, |
|
"grad_norm": 0.18225885927677155, |
|
"learning_rate": 5.7748603780081735e-06, |
|
"loss": 0.0217, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.489123269611074, |
|
"grad_norm": 0.49436914920806885, |
|
"learning_rate": 5.62968742150507e-06, |
|
"loss": 0.0158, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 4.4957152274225445, |
|
"grad_norm": 0.2793099582195282, |
|
"learning_rate": 5.4863096434302655e-06, |
|
"loss": 0.016, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 4.502307185234015, |
|
"grad_norm": 0.2998494505882263, |
|
"learning_rate": 5.344729771216661e-06, |
|
"loss": 0.0174, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 4.508899143045484, |
|
"grad_norm": 0.45131003856658936, |
|
"learning_rate": 5.204950498096117e-06, |
|
"loss": 0.0196, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 4.515491100856955, |
|
"grad_norm": 0.37397655844688416, |
|
"learning_rate": 5.066974483048215e-06, |
|
"loss": 0.0158, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 4.522083058668424, |
|
"grad_norm": 0.5381725430488586, |
|
"learning_rate": 4.930804350749729e-06, |
|
"loss": 0.016, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 4.528675016479895, |
|
"grad_norm": 0.2811379134654999, |
|
"learning_rate": 4.796442691524638e-06, |
|
"loss": 0.013, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 4.535266974291364, |
|
"grad_norm": 0.205452561378479, |
|
"learning_rate": 4.663892061294872e-06, |
|
"loss": 0.0165, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 4.541858932102834, |
|
"grad_norm": 0.2746995687484741, |
|
"learning_rate": 4.5331549815317174e-06, |
|
"loss": 0.0227, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 4.548450889914305, |
|
"grad_norm": 0.30904215574264526, |
|
"learning_rate": 4.404233939207791e-06, |
|
"loss": 0.0153, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.555042847725774, |
|
"grad_norm": 0.42725998163223267, |
|
"learning_rate": 4.2771313867498e-06, |
|
"loss": 0.0192, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 4.561634805537245, |
|
"grad_norm": 0.18472789227962494, |
|
"learning_rate": 4.151849741991864e-06, |
|
"loss": 0.025, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 4.568226763348715, |
|
"grad_norm": 0.3807401955127716, |
|
"learning_rate": 4.0283913881294935e-06, |
|
"loss": 0.0181, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 4.5748187211601845, |
|
"grad_norm": 0.17289142310619354, |
|
"learning_rate": 3.906758673674293e-06, |
|
"loss": 0.0148, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 4.581410678971655, |
|
"grad_norm": 0.32773271203041077, |
|
"learning_rate": 3.7869539124092525e-06, |
|
"loss": 0.0173, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 4.588002636783124, |
|
"grad_norm": 0.2213710993528366, |
|
"learning_rate": 3.6689793833447837e-06, |
|
"loss": 0.0137, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 4.594594594594595, |
|
"grad_norm": 0.17836393415927887, |
|
"learning_rate": 3.552837330675296e-06, |
|
"loss": 0.0184, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 4.601186552406064, |
|
"grad_norm": 0.2593984603881836, |
|
"learning_rate": 3.43852996373657e-06, |
|
"loss": 0.0138, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 4.607778510217535, |
|
"grad_norm": 0.2913285195827484, |
|
"learning_rate": 3.3260594569636928e-06, |
|
"loss": 0.0212, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 4.614370468029005, |
|
"grad_norm": 0.18963216245174408, |
|
"learning_rate": 3.215427949849714e-06, |
|
"loss": 0.0155, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.6209624258404745, |
|
"grad_norm": 0.30186694860458374, |
|
"learning_rate": 3.1066375469049337e-06, |
|
"loss": 0.0185, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 4.627554383651945, |
|
"grad_norm": 0.3594430685043335, |
|
"learning_rate": 2.9996903176168765e-06, |
|
"loss": 0.0157, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 4.634146341463414, |
|
"grad_norm": 0.407387912273407, |
|
"learning_rate": 2.8945882964109496e-06, |
|
"loss": 0.0155, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 4.640738299274885, |
|
"grad_norm": 0.1670001596212387, |
|
"learning_rate": 2.7913334826116357e-06, |
|
"loss": 0.0156, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 4.647330257086354, |
|
"grad_norm": 0.3461068272590637, |
|
"learning_rate": 2.689927840404638e-06, |
|
"loss": 0.0155, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 4.6539222148978245, |
|
"grad_norm": 0.1870720386505127, |
|
"learning_rate": 2.590373298799342e-06, |
|
"loss": 0.0137, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 4.660514172709295, |
|
"grad_norm": 0.5297737717628479, |
|
"learning_rate": 2.492671751592235e-06, |
|
"loss": 0.021, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 4.667106130520764, |
|
"grad_norm": 0.41437268257141113, |
|
"learning_rate": 2.3968250573308424e-06, |
|
"loss": 0.0166, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 4.673698088332235, |
|
"grad_norm": 0.2162405252456665, |
|
"learning_rate": 2.302835039278339e-06, |
|
"loss": 0.0163, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 4.680290046143705, |
|
"grad_norm": 0.3162844479084015, |
|
"learning_rate": 2.2107034853789288e-06, |
|
"loss": 0.0184, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.686882003955175, |
|
"grad_norm": 0.23974072933197021, |
|
"learning_rate": 2.1204321482238164e-06, |
|
"loss": 0.0187, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 4.693473961766645, |
|
"grad_norm": 0.24216875433921814, |
|
"learning_rate": 2.0320227450178254e-06, |
|
"loss": 0.0145, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 4.7000659195781145, |
|
"grad_norm": 0.3286508023738861, |
|
"learning_rate": 1.945476957546788e-06, |
|
"loss": 0.0189, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 4.706657877389585, |
|
"grad_norm": 0.22018277645111084, |
|
"learning_rate": 1.860796432145495e-06, |
|
"loss": 0.0164, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 4.713249835201054, |
|
"grad_norm": 0.18138107657432556, |
|
"learning_rate": 1.7779827796664538e-06, |
|
"loss": 0.0173, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 4.719841793012525, |
|
"grad_norm": 0.3609893321990967, |
|
"learning_rate": 1.6970375754491562e-06, |
|
"loss": 0.0291, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 4.726433750823995, |
|
"grad_norm": 0.31565043330192566, |
|
"learning_rate": 1.6179623592901926e-06, |
|
"loss": 0.014, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 4.733025708635465, |
|
"grad_norm": 0.27240124344825745, |
|
"learning_rate": 1.5407586354139193e-06, |
|
"loss": 0.0167, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 4.739617666446935, |
|
"grad_norm": 0.3199063837528229, |
|
"learning_rate": 1.4654278724438364e-06, |
|
"loss": 0.0164, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 4.746209624258404, |
|
"grad_norm": 0.23247933387756348, |
|
"learning_rate": 1.3919715033746893e-06, |
|
"loss": 0.0195, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.752801582069875, |
|
"grad_norm": 0.26770317554473877, |
|
"learning_rate": 1.3203909255451452e-06, |
|
"loss": 0.0125, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 4.759393539881344, |
|
"grad_norm": 0.2076646387577057, |
|
"learning_rate": 1.2506875006113027e-06, |
|
"loss": 0.0162, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 4.765985497692815, |
|
"grad_norm": 0.1567927598953247, |
|
"learning_rate": 1.1828625545207027e-06, |
|
"loss": 0.0142, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 4.772577455504285, |
|
"grad_norm": 0.3224427402019501, |
|
"learning_rate": 1.1169173774871478e-06, |
|
"loss": 0.0161, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 4.7791694133157545, |
|
"grad_norm": 0.5948562622070312, |
|
"learning_rate": 1.0528532239661547e-06, |
|
"loss": 0.0164, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 4.785761371127225, |
|
"grad_norm": 0.30895039439201355, |
|
"learning_rate": 9.906713126310974e-07, |
|
"loss": 0.0171, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 4.792353328938695, |
|
"grad_norm": 0.14259961247444153, |
|
"learning_rate": 9.303728263500011e-07, |
|
"loss": 0.0194, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 4.798945286750165, |
|
"grad_norm": 0.15019071102142334, |
|
"learning_rate": 8.719589121630622e-07, |
|
"loss": 0.0163, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 4.805537244561635, |
|
"grad_norm": 0.2892571687698364, |
|
"learning_rate": 8.154306812608315e-07, |
|
"loss": 0.0173, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 4.812129202373105, |
|
"grad_norm": 0.2563762962818146, |
|
"learning_rate": 7.607892089630308e-07, |
|
"loss": 0.0168, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.818721160184575, |
|
"grad_norm": 0.2222357541322708, |
|
"learning_rate": 7.080355346981815e-07, |
|
"loss": 0.014, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 4.8253131179960445, |
|
"grad_norm": 0.22898784279823303, |
|
"learning_rate": 6.571706619837526e-07, |
|
"loss": 0.0135, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 4.831905075807515, |
|
"grad_norm": 0.23187340795993805, |
|
"learning_rate": 6.081955584071097e-07, |
|
"loss": 0.0142, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 4.838497033618985, |
|
"grad_norm": 0.3049458861351013, |
|
"learning_rate": 5.61111155607108e-07, |
|
"loss": 0.0199, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 4.845088991430455, |
|
"grad_norm": 0.17564386129379272, |
|
"learning_rate": 5.159183492563613e-07, |
|
"loss": 0.0151, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 4.851680949241925, |
|
"grad_norm": 0.3510572016239166, |
|
"learning_rate": 4.7261799904420035e-07, |
|
"loss": 0.0164, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 4.8582729070533945, |
|
"grad_norm": 0.31466346979141235, |
|
"learning_rate": 4.3121092866031945e-07, |
|
"loss": 0.0176, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 4.864864864864865, |
|
"grad_norm": 0.2005147635936737, |
|
"learning_rate": 3.91697925779122e-07, |
|
"loss": 0.0168, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 4.871456822676334, |
|
"grad_norm": 0.1678527295589447, |
|
"learning_rate": 3.5407974204473284e-07, |
|
"loss": 0.0175, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 0.21754373610019684, |
|
"learning_rate": 3.1835709305668703e-07, |
|
"loss": 0.0127, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.884640738299275, |
|
"grad_norm": 0.21587257087230682, |
|
"learning_rate": 2.84530658356319e-07, |
|
"loss": 0.017, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 4.891232696110745, |
|
"grad_norm": 0.31447526812553406, |
|
"learning_rate": 2.526010814138613e-07, |
|
"loss": 0.0217, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 4.897824653922215, |
|
"grad_norm": 0.30843478441238403, |
|
"learning_rate": 2.2256896961616592e-07, |
|
"loss": 0.0181, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 4.904416611733685, |
|
"grad_norm": 0.29951369762420654, |
|
"learning_rate": 1.9443489425517992e-07, |
|
"loss": 0.0152, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 4.911008569545155, |
|
"grad_norm": 0.4117021858692169, |
|
"learning_rate": 1.6819939051706535e-07, |
|
"loss": 0.0127, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 4.917600527356625, |
|
"grad_norm": 0.11666778475046158, |
|
"learning_rate": 1.438629574720074e-07, |
|
"loss": 0.0144, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 4.924192485168095, |
|
"grad_norm": 0.3991844356060028, |
|
"learning_rate": 1.2142605806474417e-07, |
|
"loss": 0.0162, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 4.930784442979565, |
|
"grad_norm": 0.09675031900405884, |
|
"learning_rate": 1.0088911910576259e-07, |
|
"loss": 0.0223, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 4.937376400791035, |
|
"grad_norm": 0.3356577157974243, |
|
"learning_rate": 8.225253126314947e-08, |
|
"loss": 0.0168, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 4.943968358602505, |
|
"grad_norm": 0.27056625485420227, |
|
"learning_rate": 6.551664905517508e-08, |
|
"loss": 0.0166, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.950560316413975, |
|
"grad_norm": 0.24081185460090637, |
|
"learning_rate": 5.068179084355418e-08, |
|
"loss": 0.0164, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 4.957152274225445, |
|
"grad_norm": 0.3618698716163635, |
|
"learning_rate": 3.774823882738421e-08, |
|
"loss": 0.0176, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 4.963744232036915, |
|
"grad_norm": 0.20548762381076813, |
|
"learning_rate": 2.6716239037805068e-08, |
|
"loss": 0.0183, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 4.970336189848385, |
|
"grad_norm": 0.24806766211986542, |
|
"learning_rate": 1.7586001333258495e-08, |
|
"loss": 0.0156, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 4.976928147659855, |
|
"grad_norm": 0.3018137216567993, |
|
"learning_rate": 1.0357699395535658e-08, |
|
"loss": 0.0196, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 4.9835201054713245, |
|
"grad_norm": 0.24933604896068573, |
|
"learning_rate": 5.031470726490906e-09, |
|
"loss": 0.015, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 4.990112063282795, |
|
"grad_norm": 0.23485144972801208, |
|
"learning_rate": 1.6074166453883266e-09, |
|
"loss": 0.0103, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 4.996704021094265, |
|
"grad_norm": 0.4469901919364929, |
|
"learning_rate": 8.560228699217021e-11, |
|
"loss": 0.0147, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 4.998681608437706, |
|
"step": 7583, |
|
"total_flos": 2.658519488376864e+17, |
|
"train_loss": 0.03622536294503214, |
|
"train_runtime": 3445.8975, |
|
"train_samples_per_second": 35.209, |
|
"train_steps_per_second": 2.201 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 7583, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.658519488376864e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|