|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.6397282736003747, |
|
"eval_steps": 500, |
|
"global_step": 7000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0023424689622862497, |
|
"grad_norm": 4.449338436126709, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.3236, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004684937924572499, |
|
"grad_norm": 3.442420721054077, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.1552, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007027406886858749, |
|
"grad_norm": 2.458024263381958, |
|
"learning_rate": 6e-06, |
|
"loss": 0.9371, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009369875849144999, |
|
"grad_norm": 2.4206013679504395, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.1333, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011712344811431248, |
|
"grad_norm": 4.484491348266602, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7988, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014054813773717497, |
|
"grad_norm": 7.087528228759766, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.8714, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016397282736003747, |
|
"grad_norm": 2.9479169845581055, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.5826, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.018739751698289998, |
|
"grad_norm": 2.2344982624053955, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.457, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02108222066057625, |
|
"grad_norm": 1.1311728954315186, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.3638, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.023424689622862496, |
|
"grad_norm": 5.992610931396484, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7519, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.025767158585148747, |
|
"grad_norm": 1.328804612159729, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.369, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028109627547434995, |
|
"grad_norm": 2.6690480709075928, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.2072, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.030452096509721246, |
|
"grad_norm": 1.2436017990112305, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.2564, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03279456547200749, |
|
"grad_norm": 2.130502939224243, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.1741, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.035137034434293744, |
|
"grad_norm": 1.1833769083023071, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1982, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.037479503396579995, |
|
"grad_norm": 0.887791633605957, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.2346, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.039821972358866246, |
|
"grad_norm": 2.4128785133361816, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.1967, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0421644413211525, |
|
"grad_norm": 1.2833918333053589, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1552, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04450691028343874, |
|
"grad_norm": 1.459666132926941, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.2237, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04684937924572499, |
|
"grad_norm": 1.7674411535263062, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1619, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.049191848208011243, |
|
"grad_norm": 1.2941542863845825, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.184, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.051534317170297494, |
|
"grad_norm": 1.7022488117218018, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.1501, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.053876786132583745, |
|
"grad_norm": 0.8502867221832275, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.2449, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05621925509486999, |
|
"grad_norm": 2.1729302406311035, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.141, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05856172405715624, |
|
"grad_norm": 1.9990278482437134, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1569, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06090419301944249, |
|
"grad_norm": 1.0973132848739624, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 0.1574, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06324666198172874, |
|
"grad_norm": 1.5121344327926636, |
|
"learning_rate": 5.4000000000000005e-05, |
|
"loss": 0.1309, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06558913094401499, |
|
"grad_norm": 1.0041357278823853, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 0.2048, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06793159990630124, |
|
"grad_norm": 1.9920216798782349, |
|
"learning_rate": 5.8e-05, |
|
"loss": 0.1425, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07027406886858749, |
|
"grad_norm": 0.6136835217475891, |
|
"learning_rate": 6e-05, |
|
"loss": 0.1236, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07261653783087374, |
|
"grad_norm": 1.2063113451004028, |
|
"learning_rate": 6.2e-05, |
|
"loss": 0.1342, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07495900679315999, |
|
"grad_norm": 0.7644496560096741, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 0.1205, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07730147575544624, |
|
"grad_norm": 0.973790168762207, |
|
"learning_rate": 6.6e-05, |
|
"loss": 0.1551, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07964394471773249, |
|
"grad_norm": 1.9004161357879639, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 0.1395, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08198641368001874, |
|
"grad_norm": 0.8575976490974426, |
|
"learning_rate": 7e-05, |
|
"loss": 0.1081, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.084328882642305, |
|
"grad_norm": 1.3740334510803223, |
|
"learning_rate": 7.2e-05, |
|
"loss": 0.18, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08667135160459125, |
|
"grad_norm": 0.7421107888221741, |
|
"learning_rate": 7.4e-05, |
|
"loss": 0.1496, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08901382056687748, |
|
"grad_norm": 1.4952155351638794, |
|
"learning_rate": 7.6e-05, |
|
"loss": 0.1491, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09135628952916373, |
|
"grad_norm": 1.0072972774505615, |
|
"learning_rate": 7.800000000000001e-05, |
|
"loss": 0.1282, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09369875849144998, |
|
"grad_norm": 1.719224452972412, |
|
"learning_rate": 8e-05, |
|
"loss": 0.1779, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09604122745373624, |
|
"grad_norm": 1.4302623271942139, |
|
"learning_rate": 8.2e-05, |
|
"loss": 0.1145, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09838369641602249, |
|
"grad_norm": 0.6622968316078186, |
|
"learning_rate": 8.4e-05, |
|
"loss": 0.1159, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10072616537830874, |
|
"grad_norm": 1.0967049598693848, |
|
"learning_rate": 8.6e-05, |
|
"loss": 0.1659, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10306863434059499, |
|
"grad_norm": 1.1332488059997559, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 0.1292, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10541110330288124, |
|
"grad_norm": 1.308289647102356, |
|
"learning_rate": 9e-05, |
|
"loss": 0.1202, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10775357226516749, |
|
"grad_norm": 0.5696719884872437, |
|
"learning_rate": 9.200000000000001e-05, |
|
"loss": 0.1118, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11009604122745374, |
|
"grad_norm": 0.9922944903373718, |
|
"learning_rate": 9.4e-05, |
|
"loss": 0.1644, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11243851018973998, |
|
"grad_norm": 1.5004724264144897, |
|
"learning_rate": 9.6e-05, |
|
"loss": 0.2011, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11478097915202623, |
|
"grad_norm": 0.9503705501556396, |
|
"learning_rate": 9.8e-05, |
|
"loss": 0.1038, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.11712344811431248, |
|
"grad_norm": 1.421077013015747, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0944, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11946591707659873, |
|
"grad_norm": 0.8938995599746704, |
|
"learning_rate": 9.999972660400536e-05, |
|
"loss": 0.1216, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12180838603888498, |
|
"grad_norm": 0.46683940291404724, |
|
"learning_rate": 9.999890641901125e-05, |
|
"loss": 0.1278, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12415085500117123, |
|
"grad_norm": 0.8092114925384521, |
|
"learning_rate": 9.999753945398704e-05, |
|
"loss": 0.0794, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.12649332396345747, |
|
"grad_norm": 0.27710163593292236, |
|
"learning_rate": 9.99956257238817e-05, |
|
"loss": 0.1266, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.12883579292574374, |
|
"grad_norm": 0.81737220287323, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 0.1108, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.13117826188802997, |
|
"grad_norm": 0.6735175848007202, |
|
"learning_rate": 9.999015805811965e-05, |
|
"loss": 0.0854, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13352073085031624, |
|
"grad_norm": 0.2487485110759735, |
|
"learning_rate": 9.998660418225645e-05, |
|
"loss": 0.1045, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13586319981260248, |
|
"grad_norm": 0.3255215287208557, |
|
"learning_rate": 9.998250366089848e-05, |
|
"loss": 0.0948, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.13820566877488874, |
|
"grad_norm": 0.7749798893928528, |
|
"learning_rate": 9.997785653888835e-05, |
|
"loss": 0.0775, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14054813773717498, |
|
"grad_norm": 1.220957636833191, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 0.1201, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14289060669946124, |
|
"grad_norm": 0.8066214919090271, |
|
"learning_rate": 9.996692270216947e-05, |
|
"loss": 0.0815, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14523307566174748, |
|
"grad_norm": 0.6408377885818481, |
|
"learning_rate": 9.996063610703137e-05, |
|
"loss": 0.1, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.14757554462403374, |
|
"grad_norm": 0.8596289753913879, |
|
"learning_rate": 9.995380315038119e-05, |
|
"loss": 0.1008, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.14991801358631998, |
|
"grad_norm": 0.972243070602417, |
|
"learning_rate": 9.994642390694308e-05, |
|
"loss": 0.1075, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15226048254860622, |
|
"grad_norm": 0.5220253467559814, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 0.1021, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15460295151089248, |
|
"grad_norm": 0.5453582406044006, |
|
"learning_rate": 9.993002688846913e-05, |
|
"loss": 0.0963, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.15694542047317872, |
|
"grad_norm": 0.24789837002754211, |
|
"learning_rate": 9.992100929274846e-05, |
|
"loss": 0.0712, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.15928788943546499, |
|
"grad_norm": 0.31857672333717346, |
|
"learning_rate": 9.991144576886823e-05, |
|
"loss": 0.0859, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16163035839775122, |
|
"grad_norm": 0.7285981178283691, |
|
"learning_rate": 9.990133642141359e-05, |
|
"loss": 0.1274, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.1639728273600375, |
|
"grad_norm": 1.0549755096435547, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 0.1187, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16631529632232372, |
|
"grad_norm": 0.204506054520607, |
|
"learning_rate": 9.987948070396571e-05, |
|
"loss": 0.1005, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.16865776528461, |
|
"grad_norm": 0.4295964241027832, |
|
"learning_rate": 9.986773457298311e-05, |
|
"loss": 0.0937, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.17100023424689623, |
|
"grad_norm": 1.0681158304214478, |
|
"learning_rate": 9.985544309644475e-05, |
|
"loss": 0.0855, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.1733427032091825, |
|
"grad_norm": 0.667492151260376, |
|
"learning_rate": 9.984260640876821e-05, |
|
"loss": 0.1096, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17568517217146873, |
|
"grad_norm": 0.6995371580123901, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 0.108, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17802764113375497, |
|
"grad_norm": 0.9727945923805237, |
|
"learning_rate": 9.981529796748134e-05, |
|
"loss": 0.1155, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.18037011009604123, |
|
"grad_norm": 0.3702404201030731, |
|
"learning_rate": 9.980082651251175e-05, |
|
"loss": 0.0846, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.18271257905832747, |
|
"grad_norm": 0.3169856667518616, |
|
"learning_rate": 9.97858104436822e-05, |
|
"loss": 0.0917, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18505504802061373, |
|
"grad_norm": 0.6973789930343628, |
|
"learning_rate": 9.977024992520602e-05, |
|
"loss": 0.0785, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.18739751698289997, |
|
"grad_norm": 0.5686987042427063, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 0.1015, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.18973998594518623, |
|
"grad_norm": 0.6190043687820435, |
|
"learning_rate": 9.973749622593534e-05, |
|
"loss": 0.0753, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.19208245490747247, |
|
"grad_norm": 0.3807699382305145, |
|
"learning_rate": 9.972030340333001e-05, |
|
"loss": 0.0734, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.19442492386975874, |
|
"grad_norm": 0.45342546701431274, |
|
"learning_rate": 9.970256684745258e-05, |
|
"loss": 0.1012, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.19676739283204497, |
|
"grad_norm": 0.2780962586402893, |
|
"learning_rate": 9.968428675226714e-05, |
|
"loss": 0.0757, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.19910986179433124, |
|
"grad_norm": 0.20734530687332153, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 0.0751, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20145233075661748, |
|
"grad_norm": 0.3406268358230591, |
|
"learning_rate": 9.964609674954696e-05, |
|
"loss": 0.0937, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2037947997189037, |
|
"grad_norm": 0.33824971318244934, |
|
"learning_rate": 9.962618725965196e-05, |
|
"loss": 0.0913, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.20613726868118998, |
|
"grad_norm": 0.5773669481277466, |
|
"learning_rate": 9.96057350657239e-05, |
|
"loss": 0.0834, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.20847973764347622, |
|
"grad_norm": 0.5624499917030334, |
|
"learning_rate": 9.95847403914247e-05, |
|
"loss": 0.1001, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21082220660576248, |
|
"grad_norm": 0.5361132025718689, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 0.1233, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21316467556804872, |
|
"grad_norm": 0.4824270009994507, |
|
"learning_rate": 9.954112452602045e-05, |
|
"loss": 0.0882, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.21550714453033498, |
|
"grad_norm": 0.6482338905334473, |
|
"learning_rate": 9.95185038118915e-05, |
|
"loss": 0.0647, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.21784961349262122, |
|
"grad_norm": 0.2783452868461609, |
|
"learning_rate": 9.949534157133844e-05, |
|
"loss": 0.0917, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.22019208245490748, |
|
"grad_norm": 0.4593198597431183, |
|
"learning_rate": 9.94716380576598e-05, |
|
"loss": 0.068, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22253455141719372, |
|
"grad_norm": 0.7751959562301636, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 0.1032, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.22487702037947996, |
|
"grad_norm": 0.3963168263435364, |
|
"learning_rate": 9.942260825371358e-05, |
|
"loss": 0.0942, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.22721948934176622, |
|
"grad_norm": 0.40413302183151245, |
|
"learning_rate": 9.939728249962807e-05, |
|
"loss": 0.0736, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.22956195830405246, |
|
"grad_norm": 0.3862430155277252, |
|
"learning_rate": 9.937141654477528e-05, |
|
"loss": 0.0726, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.23190442726633873, |
|
"grad_norm": 0.5864925384521484, |
|
"learning_rate": 9.934501067202117e-05, |
|
"loss": 0.0872, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.23424689622862496, |
|
"grad_norm": 0.31625375151634216, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 0.0708, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23658936519091123, |
|
"grad_norm": 0.5403046011924744, |
|
"learning_rate": 9.929058033379181e-05, |
|
"loss": 0.073, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.23893183415319746, |
|
"grad_norm": 0.4366021156311035, |
|
"learning_rate": 9.926255646355804e-05, |
|
"loss": 0.0643, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24127430311548373, |
|
"grad_norm": 0.500108540058136, |
|
"learning_rate": 9.923399386589933e-05, |
|
"loss": 0.0437, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.24361677207776997, |
|
"grad_norm": 0.8096440434455872, |
|
"learning_rate": 9.92048928531717e-05, |
|
"loss": 0.0555, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.24595924104005623, |
|
"grad_norm": 0.6826971173286438, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 0.0704, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.24830171000234247, |
|
"grad_norm": 0.27831944823265076, |
|
"learning_rate": 9.914507686137019e-05, |
|
"loss": 0.0659, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2506441789646287, |
|
"grad_norm": 0.35980355739593506, |
|
"learning_rate": 9.911436253643445e-05, |
|
"loss": 0.0652, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.25298664792691494, |
|
"grad_norm": 0.7075427174568176, |
|
"learning_rate": 9.90831111046988e-05, |
|
"loss": 0.0933, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.25532911688920124, |
|
"grad_norm": 0.33446595072746277, |
|
"learning_rate": 9.905132290792394e-05, |
|
"loss": 0.0594, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.2576715858514875, |
|
"grad_norm": 0.21890777349472046, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 0.0636, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2600140548137737, |
|
"grad_norm": 0.19606763124465942, |
|
"learning_rate": 9.89861376156452e-05, |
|
"loss": 0.0573, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.26235652377605995, |
|
"grad_norm": 0.40309399366378784, |
|
"learning_rate": 9.895274123299723e-05, |
|
"loss": 0.0711, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.26469899273834624, |
|
"grad_norm": 0.15657459199428558, |
|
"learning_rate": 9.891880951101407e-05, |
|
"loss": 0.0596, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2670414617006325, |
|
"grad_norm": 0.5244103670120239, |
|
"learning_rate": 9.888434282076758e-05, |
|
"loss": 0.0624, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2693839306629187, |
|
"grad_norm": 0.6240133047103882, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 0.1013, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.27172639962520495, |
|
"grad_norm": 0.2892966568470001, |
|
"learning_rate": 9.881380604901964e-05, |
|
"loss": 0.0886, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.27406886858749124, |
|
"grad_norm": 0.11301174759864807, |
|
"learning_rate": 9.877773673889701e-05, |
|
"loss": 0.0967, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2764113375497775, |
|
"grad_norm": 0.6525554060935974, |
|
"learning_rate": 9.87411340032603e-05, |
|
"loss": 0.0857, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2787538065120637, |
|
"grad_norm": 0.27176904678344727, |
|
"learning_rate": 9.870399824239117e-05, |
|
"loss": 0.0556, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28109627547434995, |
|
"grad_norm": 0.4166867136955261, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 0.0684, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2834387444366362, |
|
"grad_norm": 0.19580566883087158, |
|
"learning_rate": 9.862812927522309e-05, |
|
"loss": 0.0882, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2857812133989225, |
|
"grad_norm": 0.44604888558387756, |
|
"learning_rate": 9.858939689861506e-05, |
|
"loss": 0.0883, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2881236823612087, |
|
"grad_norm": 0.49636200070381165, |
|
"learning_rate": 9.855013315614725e-05, |
|
"loss": 0.0912, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29046615132349496, |
|
"grad_norm": 0.1988007128238678, |
|
"learning_rate": 9.851033847720166e-05, |
|
"loss": 0.0719, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2928086202857812, |
|
"grad_norm": 0.30095556378364563, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 0.078, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2951510892480675, |
|
"grad_norm": 0.34190279245376587, |
|
"learning_rate": 9.842915805643155e-05, |
|
"loss": 0.0442, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2974935582103537, |
|
"grad_norm": 0.25464609265327454, |
|
"learning_rate": 9.838777320238312e-05, |
|
"loss": 0.0583, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.29983602717263996, |
|
"grad_norm": 0.07694657146930695, |
|
"learning_rate": 9.834585918739936e-05, |
|
"loss": 0.0359, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3021784961349262, |
|
"grad_norm": 0.19848985970020294, |
|
"learning_rate": 9.830341646984521e-05, |
|
"loss": 0.0812, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.30452096509721244, |
|
"grad_norm": 0.27825915813446045, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 0.0496, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.30686343405949873, |
|
"grad_norm": 0.3718523681163788, |
|
"learning_rate": 9.821694678938953e-05, |
|
"loss": 0.0671, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.30920590302178497, |
|
"grad_norm": 0.5311722159385681, |
|
"learning_rate": 9.817292077210659e-05, |
|
"loss": 0.0739, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3115483719840712, |
|
"grad_norm": 0.41185882687568665, |
|
"learning_rate": 9.812836794348004e-05, |
|
"loss": 0.0665, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.31389084094635744, |
|
"grad_norm": 0.2839798629283905, |
|
"learning_rate": 9.808328879073251e-05, |
|
"loss": 0.0495, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.31623330990864373, |
|
"grad_norm": 0.5456023812294006, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 0.0538, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.31857577887092997, |
|
"grad_norm": 1.1303348541259766, |
|
"learning_rate": 9.799155349053851e-05, |
|
"loss": 0.0948, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3209182478332162, |
|
"grad_norm": 0.3756462633609772, |
|
"learning_rate": 9.794489834629455e-05, |
|
"loss": 0.0405, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.32326071679550245, |
|
"grad_norm": 0.45304539799690247, |
|
"learning_rate": 9.789771888432375e-05, |
|
"loss": 0.0518, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3256031857577887, |
|
"grad_norm": 0.42578068375587463, |
|
"learning_rate": 9.785001562057309e-05, |
|
"loss": 0.0694, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.327945654720075, |
|
"grad_norm": 0.5314955711364746, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 0.0656, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3302881236823612, |
|
"grad_norm": 0.445273220539093, |
|
"learning_rate": 9.775303978015585e-05, |
|
"loss": 0.0467, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.33263059264464745, |
|
"grad_norm": 0.45427191257476807, |
|
"learning_rate": 9.77037682640015e-05, |
|
"loss": 0.071, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3349730616069337, |
|
"grad_norm": 1.1310575008392334, |
|
"learning_rate": 9.765397506708023e-05, |
|
"loss": 0.0783, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.33731553056922, |
|
"grad_norm": 0.37553080916404724, |
|
"learning_rate": 9.760366073392246e-05, |
|
"loss": 0.0595, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3396579995315062, |
|
"grad_norm": 0.456626296043396, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.0684, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.34200046849379245, |
|
"grad_norm": 0.23000092804431915, |
|
"learning_rate": 9.750147086550844e-05, |
|
"loss": 0.0663, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3443429374560787, |
|
"grad_norm": 0.8536004424095154, |
|
"learning_rate": 9.744959644778422e-05, |
|
"loss": 0.0615, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.346685406418365, |
|
"grad_norm": 0.2810976803302765, |
|
"learning_rate": 9.739720312887535e-05, |
|
"loss": 0.0499, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3490278753806512, |
|
"grad_norm": 0.5517282485961914, |
|
"learning_rate": 9.734429148174675e-05, |
|
"loss": 0.0623, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.35137034434293746, |
|
"grad_norm": 0.5391654372215271, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 0.0701, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3537128133052237, |
|
"grad_norm": 0.2104485183954239, |
|
"learning_rate": 9.723691552302562e-05, |
|
"loss": 0.0624, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.35605528226750993, |
|
"grad_norm": 0.6778100728988647, |
|
"learning_rate": 9.718245238567939e-05, |
|
"loss": 0.0735, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3583977512297962, |
|
"grad_norm": 0.5578711628913879, |
|
"learning_rate": 9.712747326859315e-05, |
|
"loss": 0.0649, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.36074022019208246, |
|
"grad_norm": 0.19399204850196838, |
|
"learning_rate": 9.707197877300974e-05, |
|
"loss": 0.0696, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3630826891543687, |
|
"grad_norm": 0.36409327387809753, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 0.0764, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.36542515811665494, |
|
"grad_norm": 0.3991371691226959, |
|
"learning_rate": 9.695944607949649e-05, |
|
"loss": 0.053, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.36776762707894123, |
|
"grad_norm": 0.24415276944637299, |
|
"learning_rate": 9.690240911220618e-05, |
|
"loss": 0.0359, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.37011009604122747, |
|
"grad_norm": 0.2075069695711136, |
|
"learning_rate": 9.684485922768422e-05, |
|
"loss": 0.0663, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3724525650035137, |
|
"grad_norm": 0.6543785333633423, |
|
"learning_rate": 9.6786797055287e-05, |
|
"loss": 0.0494, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.37479503396579994, |
|
"grad_norm": 0.5545148253440857, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 0.0922, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3771375029280862, |
|
"grad_norm": 0.3024766743183136, |
|
"learning_rate": 9.66691383922964e-05, |
|
"loss": 0.0458, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.37947997189037247, |
|
"grad_norm": 0.18543019890785217, |
|
"learning_rate": 9.660954318839933e-05, |
|
"loss": 0.0814, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.3818224408526587, |
|
"grad_norm": 0.6047130823135376, |
|
"learning_rate": 9.654943827000548e-05, |
|
"loss": 0.0749, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.38416490981494494, |
|
"grad_norm": 0.5619345307350159, |
|
"learning_rate": 9.648882429441257e-05, |
|
"loss": 0.0647, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.3865073787772312, |
|
"grad_norm": 0.3835267126560211, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 0.0526, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3888498477395175, |
|
"grad_norm": 0.2994864583015442, |
|
"learning_rate": 9.636607182864827e-05, |
|
"loss": 0.0451, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.3911923167018037, |
|
"grad_norm": 0.5770288705825806, |
|
"learning_rate": 9.630393468087818e-05, |
|
"loss": 0.0716, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.39353478566408995, |
|
"grad_norm": 0.3165629506111145, |
|
"learning_rate": 9.624129116069694e-05, |
|
"loss": 0.0468, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.3958772546263762, |
|
"grad_norm": 0.11682554334402084, |
|
"learning_rate": 9.617814195316411e-05, |
|
"loss": 0.0669, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.3982197235886625, |
|
"grad_norm": 0.4979915916919708, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 0.0553, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4005621925509487, |
|
"grad_norm": 0.14603012800216675, |
|
"learning_rate": 9.605032924392457e-05, |
|
"loss": 0.0597, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.40290466151323495, |
|
"grad_norm": 0.3345795273780823, |
|
"learning_rate": 9.598566713995718e-05, |
|
"loss": 0.049, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.4052471304755212, |
|
"grad_norm": 0.4213583171367645, |
|
"learning_rate": 9.59205021441015e-05, |
|
"loss": 0.0659, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4075895994378074, |
|
"grad_norm": 0.1514274775981903, |
|
"learning_rate": 9.58548349689915e-05, |
|
"loss": 0.0803, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.4099320684000937, |
|
"grad_norm": 1.1298153400421143, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 0.0574, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.41227453736237996, |
|
"grad_norm": 0.2879124581813812, |
|
"learning_rate": 9.572199695899522e-05, |
|
"loss": 0.0618, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4146170063246662, |
|
"grad_norm": 0.21584849059581757, |
|
"learning_rate": 9.565482757680415e-05, |
|
"loss": 0.069, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.41695947528695243, |
|
"grad_norm": 0.27666664123535156, |
|
"learning_rate": 9.558715892073323e-05, |
|
"loss": 0.0619, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4193019442492387, |
|
"grad_norm": 0.36067232489585876, |
|
"learning_rate": 9.551899173079607e-05, |
|
"loss": 0.0512, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.42164441321152496, |
|
"grad_norm": 0.21706882119178772, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 0.0399, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4239868821738112, |
|
"grad_norm": 0.2502746880054474, |
|
"learning_rate": 9.538116473662861e-05, |
|
"loss": 0.067, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.42632935113609743, |
|
"grad_norm": 0.19951611757278442, |
|
"learning_rate": 9.531150643965223e-05, |
|
"loss": 0.0572, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.42867182009838367, |
|
"grad_norm": 0.5946075916290283, |
|
"learning_rate": 9.524135262330098e-05, |
|
"loss": 0.0556, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.43101428906066996, |
|
"grad_norm": 0.20143412053585052, |
|
"learning_rate": 9.517070405476575e-05, |
|
"loss": 0.0556, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4333567580229562, |
|
"grad_norm": 0.30480778217315674, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 0.0721, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.43569922698524244, |
|
"grad_norm": 0.289962500333786, |
|
"learning_rate": 9.502792575695112e-05, |
|
"loss": 0.0349, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.4380416959475287, |
|
"grad_norm": 0.23470467329025269, |
|
"learning_rate": 9.49557975890723e-05, |
|
"loss": 0.0508, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.44038416490981497, |
|
"grad_norm": 0.5040431022644043, |
|
"learning_rate": 9.488317779179361e-05, |
|
"loss": 0.0576, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4427266338721012, |
|
"grad_norm": 0.4373694360256195, |
|
"learning_rate": 9.481006715927351e-05, |
|
"loss": 0.0526, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.44506910283438744, |
|
"grad_norm": 0.41776043176651, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 0.0417, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4474115717966737, |
|
"grad_norm": 0.5410218238830566, |
|
"learning_rate": 9.46623765919727e-05, |
|
"loss": 0.0737, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.4497540407589599, |
|
"grad_norm": 0.4274581968784332, |
|
"learning_rate": 9.458779827231237e-05, |
|
"loss": 0.0715, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4520965097212462, |
|
"grad_norm": 0.31722667813301086, |
|
"learning_rate": 9.451273234763371e-05, |
|
"loss": 0.0672, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.45443897868353245, |
|
"grad_norm": 0.221653014421463, |
|
"learning_rate": 9.443717963884569e-05, |
|
"loss": 0.0631, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4567814476458187, |
|
"grad_norm": 0.2043227255344391, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 0.0436, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4591239166081049, |
|
"grad_norm": 0.1967364400625229, |
|
"learning_rate": 9.428461717918511e-05, |
|
"loss": 0.0601, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.4614663855703912, |
|
"grad_norm": 0.23282958567142487, |
|
"learning_rate": 9.420760909671118e-05, |
|
"loss": 0.0441, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.46380885453267745, |
|
"grad_norm": 0.6064874529838562, |
|
"learning_rate": 9.413011756690685e-05, |
|
"loss": 0.0691, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.4661513234949637, |
|
"grad_norm": 0.29970476031303406, |
|
"learning_rate": 9.405214343720707e-05, |
|
"loss": 0.0362, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4684937924572499, |
|
"grad_norm": 0.3310692310333252, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 0.045, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4708362614195362, |
|
"grad_norm": 0.34072744846343994, |
|
"learning_rate": 9.389475079423988e-05, |
|
"loss": 0.0646, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.47317873038182245, |
|
"grad_norm": 0.09513302892446518, |
|
"learning_rate": 9.381533400219318e-05, |
|
"loss": 0.0543, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4755211993441087, |
|
"grad_norm": 0.19264456629753113, |
|
"learning_rate": 9.373543805267368e-05, |
|
"loss": 0.0682, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.47786366830639493, |
|
"grad_norm": 0.3914099633693695, |
|
"learning_rate": 9.365506381941066e-05, |
|
"loss": 0.0455, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.48020613726868117, |
|
"grad_norm": 0.4226783514022827, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 0.0689, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.48254860623096746, |
|
"grad_norm": 0.41455796360969543, |
|
"learning_rate": 9.349288402271388e-05, |
|
"loss": 0.0596, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4848910751932537, |
|
"grad_norm": 0.2510756254196167, |
|
"learning_rate": 9.341108023285238e-05, |
|
"loss": 0.0341, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.48723354415553993, |
|
"grad_norm": 0.40096133947372437, |
|
"learning_rate": 9.332880170637252e-05, |
|
"loss": 0.0813, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.48957601311782617, |
|
"grad_norm": 0.6878464221954346, |
|
"learning_rate": 9.32460493430591e-05, |
|
"loss": 0.044, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.49191848208011246, |
|
"grad_norm": 0.3416203558444977, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 0.0686, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4942609510423987, |
|
"grad_norm": 0.12535825371742249, |
|
"learning_rate": 9.30791267309698e-05, |
|
"loss": 0.0354, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.49660342000468494, |
|
"grad_norm": 0.19023941457271576, |
|
"learning_rate": 9.299495830763286e-05, |
|
"loss": 0.0376, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.4989458889669712, |
|
"grad_norm": 0.3778730034828186, |
|
"learning_rate": 9.291031969832026e-05, |
|
"loss": 0.0518, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5012883579292574, |
|
"grad_norm": 0.256195068359375, |
|
"learning_rate": 9.282521182862629e-05, |
|
"loss": 0.0571, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5036308268915437, |
|
"grad_norm": 0.19933399558067322, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 0.0271, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5059732958538299, |
|
"grad_norm": 0.06613205373287201, |
|
"learning_rate": 9.265359203611987e-05, |
|
"loss": 0.0334, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5083157648161162, |
|
"grad_norm": 0.21248801052570343, |
|
"learning_rate": 9.256708199011401e-05, |
|
"loss": 0.0746, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5106582337784025, |
|
"grad_norm": 0.3601578176021576, |
|
"learning_rate": 9.248010643731935e-05, |
|
"loss": 0.076, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5130007027406887, |
|
"grad_norm": 0.0984947606921196, |
|
"learning_rate": 9.239266632888659e-05, |
|
"loss": 0.0892, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.515343171702975, |
|
"grad_norm": 0.13032953441143036, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 0.039, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5176856406652612, |
|
"grad_norm": 0.48068541288375854, |
|
"learning_rate": 9.221639627510076e-05, |
|
"loss": 0.0585, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5200281096275474, |
|
"grad_norm": 0.42812222242355347, |
|
"learning_rate": 9.212756825740873e-05, |
|
"loss": 0.0929, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5223705785898337, |
|
"grad_norm": 0.3526000380516052, |
|
"learning_rate": 9.20382795393797e-05, |
|
"loss": 0.0657, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5247130475521199, |
|
"grad_norm": 0.14142726361751556, |
|
"learning_rate": 9.194853109746074e-05, |
|
"loss": 0.0571, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5270555165144062, |
|
"grad_norm": 0.10022013634443283, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 0.0362, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5293979854766925, |
|
"grad_norm": 0.18126869201660156, |
|
"learning_rate": 9.176765897286813e-05, |
|
"loss": 0.0616, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5317404544389787, |
|
"grad_norm": 0.22198501229286194, |
|
"learning_rate": 9.167653726818305e-05, |
|
"loss": 0.0227, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.534082923401265, |
|
"grad_norm": 0.07468587905168533, |
|
"learning_rate": 9.158495979556358e-05, |
|
"loss": 0.045, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5364253923635511, |
|
"grad_norm": 0.1882839947938919, |
|
"learning_rate": 9.14929275564863e-05, |
|
"loss": 0.0569, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5387678613258374, |
|
"grad_norm": 0.1339283585548401, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 0.0692, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5411103302881237, |
|
"grad_norm": 0.19089505076408386, |
|
"learning_rate": 9.130750280971978e-05, |
|
"loss": 0.0638, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5434527992504099, |
|
"grad_norm": 0.131087064743042, |
|
"learning_rate": 9.121411232980588e-05, |
|
"loss": 0.0656, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5457952682126962, |
|
"grad_norm": 0.24333599209785461, |
|
"learning_rate": 9.112027113896262e-05, |
|
"loss": 0.0617, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5481377371749825, |
|
"grad_norm": 0.4338069260120392, |
|
"learning_rate": 9.102598026342222e-05, |
|
"loss": 0.0384, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5504802061372687, |
|
"grad_norm": 0.3546713888645172, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 0.0594, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.552822675099555, |
|
"grad_norm": 0.1043967604637146, |
|
"learning_rate": 9.083605358775612e-05, |
|
"loss": 0.0482, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5551651440618411, |
|
"grad_norm": 0.16685545444488525, |
|
"learning_rate": 9.074041986463808e-05, |
|
"loss": 0.0439, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5575076130241274, |
|
"grad_norm": 0.15651892125606537, |
|
"learning_rate": 9.064434061081562e-05, |
|
"loss": 0.0542, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5598500819864137, |
|
"grad_norm": 0.33224546909332275, |
|
"learning_rate": 9.0547816876996e-05, |
|
"loss": 0.0772, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5621925509486999, |
|
"grad_norm": 0.3219659626483917, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.0347, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5645350199109862, |
|
"grad_norm": 0.3930731415748596, |
|
"learning_rate": 9.035344019648702e-05, |
|
"loss": 0.0386, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5668774888732724, |
|
"grad_norm": 0.13527953624725342, |
|
"learning_rate": 9.025558937546988e-05, |
|
"loss": 0.0479, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5692199578355587, |
|
"grad_norm": 0.1432938128709793, |
|
"learning_rate": 9.015729832577681e-05, |
|
"loss": 0.0319, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.571562426797845, |
|
"grad_norm": 0.25687897205352783, |
|
"learning_rate": 9.005856812230304e-05, |
|
"loss": 0.0387, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5739048957601312, |
|
"grad_norm": 0.31300991773605347, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 0.0574, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5762473647224174, |
|
"grad_norm": 0.25793933868408203, |
|
"learning_rate": 8.98597945775948e-05, |
|
"loss": 0.0415, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5785898336847036, |
|
"grad_norm": 0.13978935778141022, |
|
"learning_rate": 8.975975341011596e-05, |
|
"loss": 0.0366, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5809323026469899, |
|
"grad_norm": 0.20552988350391388, |
|
"learning_rate": 8.965927743634391e-05, |
|
"loss": 0.0519, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5832747716092762, |
|
"grad_norm": 0.0843147486448288, |
|
"learning_rate": 8.955836775506776e-05, |
|
"loss": 0.0434, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5856172405715624, |
|
"grad_norm": 0.519131600856781, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 0.044, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5879597095338487, |
|
"grad_norm": 0.20150704681873322, |
|
"learning_rate": 8.935525168886262e-05, |
|
"loss": 0.0486, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.590302178496135, |
|
"grad_norm": 0.6557456851005554, |
|
"learning_rate": 8.92530475251784e-05, |
|
"loss": 0.0444, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.5926446474584212, |
|
"grad_norm": 0.48158717155456543, |
|
"learning_rate": 8.91504140964553e-05, |
|
"loss": 0.0512, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.5949871164207075, |
|
"grad_norm": 0.3636298179626465, |
|
"learning_rate": 8.90473525250761e-05, |
|
"loss": 0.052, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.5973295853829936, |
|
"grad_norm": 0.1767117828130722, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 0.0534, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.5996720543452799, |
|
"grad_norm": 0.30989664793014526, |
|
"learning_rate": 8.883994946727849e-05, |
|
"loss": 0.0765, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6020145233075662, |
|
"grad_norm": 0.28089532256126404, |
|
"learning_rate": 8.873561024898668e-05, |
|
"loss": 0.0424, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6043569922698524, |
|
"grad_norm": 0.5266916751861572, |
|
"learning_rate": 8.863084742426719e-05, |
|
"loss": 0.0364, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6066994612321387, |
|
"grad_norm": 0.5653497576713562, |
|
"learning_rate": 8.852566213878947e-05, |
|
"loss": 0.0604, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6090419301944249, |
|
"grad_norm": 0.34995973110198975, |
|
"learning_rate": 8.842005554284296e-05, |
|
"loss": 0.0386, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6113843991567112, |
|
"grad_norm": 0.42935842275619507, |
|
"learning_rate": 8.831402879132446e-05, |
|
"loss": 0.0595, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6137268681189975, |
|
"grad_norm": 0.19672085344791412, |
|
"learning_rate": 8.820758304372557e-05, |
|
"loss": 0.0426, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6160693370812836, |
|
"grad_norm": 0.17344583570957184, |
|
"learning_rate": 8.810071946411989e-05, |
|
"loss": 0.0979, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6184118060435699, |
|
"grad_norm": 0.19755525887012482, |
|
"learning_rate": 8.799343922115044e-05, |
|
"loss": 0.0322, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6207542750058562, |
|
"grad_norm": 0.33817166090011597, |
|
"learning_rate": 8.788574348801675e-05, |
|
"loss": 0.0375, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6230967439681424, |
|
"grad_norm": 0.44614845514297485, |
|
"learning_rate": 8.77776334424621e-05, |
|
"loss": 0.054, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6254392129304287, |
|
"grad_norm": 0.4128440022468567, |
|
"learning_rate": 8.766911026676064e-05, |
|
"loss": 0.0422, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6277816818927149, |
|
"grad_norm": 0.22449485957622528, |
|
"learning_rate": 8.756017514770443e-05, |
|
"loss": 0.037, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6301241508550012, |
|
"grad_norm": 0.2689172029495239, |
|
"learning_rate": 8.745082927659047e-05, |
|
"loss": 0.0353, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6324666198172875, |
|
"grad_norm": 0.05075841769576073, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 0.0333, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6348090887795736, |
|
"grad_norm": 0.1499403417110443, |
|
"learning_rate": 8.723091006582389e-05, |
|
"loss": 0.0559, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6371515577418599, |
|
"grad_norm": 0.36928892135620117, |
|
"learning_rate": 8.71203391311725e-05, |
|
"loss": 0.0763, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6394940267041461, |
|
"grad_norm": 0.5727768540382385, |
|
"learning_rate": 8.700936225443959e-05, |
|
"loss": 0.0527, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6418364956664324, |
|
"grad_norm": 0.30735543370246887, |
|
"learning_rate": 8.689798064925049e-05, |
|
"loss": 0.0585, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6441789646287187, |
|
"grad_norm": 0.3882769048213959, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 0.0491, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6465214335910049, |
|
"grad_norm": 0.365843802690506, |
|
"learning_rate": 8.6674008130122e-05, |
|
"loss": 0.0397, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6488639025532912, |
|
"grad_norm": 0.21451324224472046, |
|
"learning_rate": 8.656141966551019e-05, |
|
"loss": 0.0365, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6512063715155774, |
|
"grad_norm": 0.1609046310186386, |
|
"learning_rate": 8.644843137107059e-05, |
|
"loss": 0.039, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6535488404778637, |
|
"grad_norm": 0.7074998021125793, |
|
"learning_rate": 8.633504448242505e-05, |
|
"loss": 0.0591, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.65589130944015, |
|
"grad_norm": 0.21024738252162933, |
|
"learning_rate": 8.622126023955446e-05, |
|
"loss": 0.0488, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6582337784024361, |
|
"grad_norm": 0.3021513819694519, |
|
"learning_rate": 8.610707988678503e-05, |
|
"loss": 0.04, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6605762473647224, |
|
"grad_norm": 0.19868189096450806, |
|
"learning_rate": 8.599250467277483e-05, |
|
"loss": 0.0319, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6629187163270087, |
|
"grad_norm": 0.15607990324497223, |
|
"learning_rate": 8.587753585050004e-05, |
|
"loss": 0.036, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6652611852892949, |
|
"grad_norm": 0.3136105239391327, |
|
"learning_rate": 8.576217467724128e-05, |
|
"loss": 0.0752, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6676036542515812, |
|
"grad_norm": 0.21903324127197266, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 0.0416, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6699461232138674, |
|
"grad_norm": 0.5193045735359192, |
|
"learning_rate": 8.553028032833397e-05, |
|
"loss": 0.0386, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6722885921761537, |
|
"grad_norm": 0.5539060235023499, |
|
"learning_rate": 8.541374968864487e-05, |
|
"loss": 0.0439, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.67463106113844, |
|
"grad_norm": 0.2819710969924927, |
|
"learning_rate": 8.529683176986295e-05, |
|
"loss": 0.0541, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6769735301007261, |
|
"grad_norm": 0.1039167121052742, |
|
"learning_rate": 8.517952785058385e-05, |
|
"loss": 0.039, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6793159990630124, |
|
"grad_norm": 0.062352605164051056, |
|
"learning_rate": 8.506183921362443e-05, |
|
"loss": 0.0401, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6816584680252986, |
|
"grad_norm": 0.5535932183265686, |
|
"learning_rate": 8.494376714600878e-05, |
|
"loss": 0.0505, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6840009369875849, |
|
"grad_norm": 0.37601238489151, |
|
"learning_rate": 8.482531293895412e-05, |
|
"loss": 0.0391, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6863434059498712, |
|
"grad_norm": 0.06856988370418549, |
|
"learning_rate": 8.470647788785665e-05, |
|
"loss": 0.0389, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.6886858749121574, |
|
"grad_norm": 0.5693712830543518, |
|
"learning_rate": 8.458726329227747e-05, |
|
"loss": 0.0495, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.6910283438744437, |
|
"grad_norm": 0.14418154954910278, |
|
"learning_rate": 8.44676704559283e-05, |
|
"loss": 0.0405, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.69337081283673, |
|
"grad_norm": 0.11880888044834137, |
|
"learning_rate": 8.434770068665723e-05, |
|
"loss": 0.0362, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.6957132817990161, |
|
"grad_norm": 0.6350199580192566, |
|
"learning_rate": 8.422735529643444e-05, |
|
"loss": 0.0607, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.6980557507613024, |
|
"grad_norm": 0.19949962198734283, |
|
"learning_rate": 8.410663560133784e-05, |
|
"loss": 0.0346, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7003982197235886, |
|
"grad_norm": 0.19905024766921997, |
|
"learning_rate": 8.398554292153866e-05, |
|
"loss": 0.0455, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7027406886858749, |
|
"grad_norm": 0.12724433839321136, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 0.0312, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7050831576481612, |
|
"grad_norm": 0.6818522214889526, |
|
"learning_rate": 8.37422439088976e-05, |
|
"loss": 0.0477, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7074256266104474, |
|
"grad_norm": 0.14397919178009033, |
|
"learning_rate": 8.362004023673474e-05, |
|
"loss": 0.054, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7097680955727337, |
|
"grad_norm": 0.1597958207130432, |
|
"learning_rate": 8.349746890119826e-05, |
|
"loss": 0.0475, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7121105645350199, |
|
"grad_norm": 0.2985258102416992, |
|
"learning_rate": 8.337453124270863e-05, |
|
"loss": 0.0276, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7144530334973062, |
|
"grad_norm": 0.17043350636959076, |
|
"learning_rate": 8.32512286056924e-05, |
|
"loss": 0.0337, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7167955024595924, |
|
"grad_norm": 0.390009343624115, |
|
"learning_rate": 8.31275623385675e-05, |
|
"loss": 0.0277, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7191379714218786, |
|
"grad_norm": 0.20475880801677704, |
|
"learning_rate": 8.300353379372834e-05, |
|
"loss": 0.0691, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7214804403841649, |
|
"grad_norm": 0.11685507744550705, |
|
"learning_rate": 8.287914432753123e-05, |
|
"loss": 0.0411, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7238229093464511, |
|
"grad_norm": 0.531944990158081, |
|
"learning_rate": 8.275439530027948e-05, |
|
"loss": 0.0511, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7261653783087374, |
|
"grad_norm": 0.05079588294029236, |
|
"learning_rate": 8.262928807620843e-05, |
|
"loss": 0.0664, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7285078472710237, |
|
"grad_norm": 0.3010249435901642, |
|
"learning_rate": 8.250382402347065e-05, |
|
"loss": 0.0565, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7308503162333099, |
|
"grad_norm": 0.2115558385848999, |
|
"learning_rate": 8.237800451412095e-05, |
|
"loss": 0.0615, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7331927851955962, |
|
"grad_norm": 0.3865530490875244, |
|
"learning_rate": 8.225183092410128e-05, |
|
"loss": 0.0349, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7355352541578825, |
|
"grad_norm": 0.07815901935100555, |
|
"learning_rate": 8.212530463322583e-05, |
|
"loss": 0.036, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7378777231201686, |
|
"grad_norm": 0.11009709537029266, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 0.0386, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7402201920824549, |
|
"grad_norm": 0.12392786890268326, |
|
"learning_rate": 8.18711994874345e-05, |
|
"loss": 0.0396, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.7425626610447411, |
|
"grad_norm": 0.16354168951511383, |
|
"learning_rate": 8.174362341137177e-05, |
|
"loss": 0.0446, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7449051300070274, |
|
"grad_norm": 0.2223191112279892, |
|
"learning_rate": 8.161570019212921e-05, |
|
"loss": 0.0326, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7472475989693137, |
|
"grad_norm": 0.176427960395813, |
|
"learning_rate": 8.148743122865463e-05, |
|
"loss": 0.0235, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7495900679315999, |
|
"grad_norm": 0.19706971943378448, |
|
"learning_rate": 8.135881792367686e-05, |
|
"loss": 0.0417, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7519325368938862, |
|
"grad_norm": 0.08818463236093521, |
|
"learning_rate": 8.12298616836904e-05, |
|
"loss": 0.0463, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.7542750058561724, |
|
"grad_norm": 0.08389343321323395, |
|
"learning_rate": 8.110056391894005e-05, |
|
"loss": 0.0259, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7566174748184586, |
|
"grad_norm": 0.13730217516422272, |
|
"learning_rate": 8.097092604340542e-05, |
|
"loss": 0.0394, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7589599437807449, |
|
"grad_norm": 0.48324722051620483, |
|
"learning_rate": 8.084094947478556e-05, |
|
"loss": 0.0488, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7613024127430311, |
|
"grad_norm": 0.15898984670639038, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 0.0402, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7636448817053174, |
|
"grad_norm": 0.19997884333133698, |
|
"learning_rate": 8.057998594759022e-05, |
|
"loss": 0.0406, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7659873506676037, |
|
"grad_norm": 0.06215028837323189, |
|
"learning_rate": 8.044900184287007e-05, |
|
"loss": 0.0577, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7683298196298899, |
|
"grad_norm": 0.28326717019081116, |
|
"learning_rate": 8.031768475274413e-05, |
|
"loss": 0.057, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7706722885921762, |
|
"grad_norm": 0.29579654335975647, |
|
"learning_rate": 8.018603611327504e-05, |
|
"loss": 0.0563, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.7730147575544624, |
|
"grad_norm": 0.5313428044319153, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 0.0748, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7753572265167487, |
|
"grad_norm": 0.45142146944999695, |
|
"learning_rate": 7.992174994867123e-05, |
|
"loss": 0.0344, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.777699695479035, |
|
"grad_norm": 0.22848837077617645, |
|
"learning_rate": 7.978911531372765e-05, |
|
"loss": 0.0367, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7800421644413211, |
|
"grad_norm": 0.07316577434539795, |
|
"learning_rate": 7.965615490979163e-05, |
|
"loss": 0.0332, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.7823846334036074, |
|
"grad_norm": 0.08522647619247437, |
|
"learning_rate": 7.952287019089685e-05, |
|
"loss": 0.0313, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.7847271023658936, |
|
"grad_norm": 0.2560670077800751, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 0.0753, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7870695713281799, |
|
"grad_norm": 0.2529207468032837, |
|
"learning_rate": 7.925533364208309e-05, |
|
"loss": 0.0584, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.7894120402904662, |
|
"grad_norm": 0.20108440518379211, |
|
"learning_rate": 7.912108473790092e-05, |
|
"loss": 0.0443, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.7917545092527524, |
|
"grad_norm": 0.09312764555215836, |
|
"learning_rate": 7.898651737020166e-05, |
|
"loss": 0.0529, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.7940969782150387, |
|
"grad_norm": 0.08973310142755508, |
|
"learning_rate": 7.88516330105925e-05, |
|
"loss": 0.0313, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.796439447177325, |
|
"grad_norm": 0.2917576730251312, |
|
"learning_rate": 7.871643313414718e-05, |
|
"loss": 0.0699, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7987819161396111, |
|
"grad_norm": 0.3426614999771118, |
|
"learning_rate": 7.858091921938988e-05, |
|
"loss": 0.0554, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8011243851018974, |
|
"grad_norm": 0.10231604427099228, |
|
"learning_rate": 7.844509274827907e-05, |
|
"loss": 0.0469, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8034668540641836, |
|
"grad_norm": 0.36295169591903687, |
|
"learning_rate": 7.830895520619128e-05, |
|
"loss": 0.0489, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8058093230264699, |
|
"grad_norm": 0.23017369210720062, |
|
"learning_rate": 7.817250808190483e-05, |
|
"loss": 0.0407, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8081517919887562, |
|
"grad_norm": 0.2438231259584427, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 0.0542, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8104942609510424, |
|
"grad_norm": 0.28502318263053894, |
|
"learning_rate": 7.789869105876083e-05, |
|
"loss": 0.0433, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8128367299133287, |
|
"grad_norm": 0.7063993215560913, |
|
"learning_rate": 7.776132415432234e-05, |
|
"loss": 0.0687, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8151791988756149, |
|
"grad_norm": 0.3574845492839813, |
|
"learning_rate": 7.762365365649067e-05, |
|
"loss": 0.0283, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8175216678379011, |
|
"grad_norm": 0.1527651846408844, |
|
"learning_rate": 7.748568107080832e-05, |
|
"loss": 0.0502, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8198641368001874, |
|
"grad_norm": 0.20111270248889923, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 0.0526, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8222066057624736, |
|
"grad_norm": 0.5221764445304871, |
|
"learning_rate": 7.720883567456298e-05, |
|
"loss": 0.0385, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8245490747247599, |
|
"grad_norm": 0.11450177431106567, |
|
"learning_rate": 7.70699658915369e-05, |
|
"loss": 0.0495, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8268915436870461, |
|
"grad_norm": 0.2669161558151245, |
|
"learning_rate": 7.693080007570084e-05, |
|
"loss": 0.0419, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8292340126493324, |
|
"grad_norm": 0.4859974682331085, |
|
"learning_rate": 7.679133974894983e-05, |
|
"loss": 0.0454, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8315764816116187, |
|
"grad_norm": 0.13351887464523315, |
|
"learning_rate": 7.66515864363997e-05, |
|
"loss": 0.0401, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8339189505739049, |
|
"grad_norm": 0.3376217484474182, |
|
"learning_rate": 7.651154166637025e-05, |
|
"loss": 0.0372, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8362614195361912, |
|
"grad_norm": 0.4906126856803894, |
|
"learning_rate": 7.637120697036866e-05, |
|
"loss": 0.0444, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8386038884984774, |
|
"grad_norm": 0.1525869518518448, |
|
"learning_rate": 7.623058388307269e-05, |
|
"loss": 0.0411, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8409463574607636, |
|
"grad_norm": 0.10655678063631058, |
|
"learning_rate": 7.608967394231387e-05, |
|
"loss": 0.0322, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8432888264230499, |
|
"grad_norm": 0.6658011674880981, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 0.0736, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8456312953853361, |
|
"grad_norm": 0.2985578775405884, |
|
"learning_rate": 7.580699966740201e-05, |
|
"loss": 0.0296, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8479737643476224, |
|
"grad_norm": 0.08989045768976212, |
|
"learning_rate": 7.566523842452958e-05, |
|
"loss": 0.0412, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8503162333099087, |
|
"grad_norm": 0.37455546855926514, |
|
"learning_rate": 7.552319651072164e-05, |
|
"loss": 0.0473, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8526587022721949, |
|
"grad_norm": 0.19339019060134888, |
|
"learning_rate": 7.538087547932585e-05, |
|
"loss": 0.0475, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8550011712344812, |
|
"grad_norm": 0.22095589339733124, |
|
"learning_rate": 7.52382768867422e-05, |
|
"loss": 0.0287, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8573436401967673, |
|
"grad_norm": 0.39905375242233276, |
|
"learning_rate": 7.509540229240601e-05, |
|
"loss": 0.0418, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8596861091590536, |
|
"grad_norm": 0.1556907296180725, |
|
"learning_rate": 7.495225325877103e-05, |
|
"loss": 0.0462, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8620285781213399, |
|
"grad_norm": 0.43170592188835144, |
|
"learning_rate": 7.480883135129211e-05, |
|
"loss": 0.0453, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8643710470836261, |
|
"grad_norm": 0.09220433235168457, |
|
"learning_rate": 7.466513813840825e-05, |
|
"loss": 0.0414, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8667135160459124, |
|
"grad_norm": 0.09303878992795944, |
|
"learning_rate": 7.452117519152542e-05, |
|
"loss": 0.0412, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8690559850081987, |
|
"grad_norm": 0.456315279006958, |
|
"learning_rate": 7.437694408499933e-05, |
|
"loss": 0.0429, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8713984539704849, |
|
"grad_norm": 0.0672278180718422, |
|
"learning_rate": 7.423244639611826e-05, |
|
"loss": 0.0492, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8737409229327712, |
|
"grad_norm": 0.11052095890045166, |
|
"learning_rate": 7.408768370508576e-05, |
|
"loss": 0.0404, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8760833918950574, |
|
"grad_norm": 0.20042133331298828, |
|
"learning_rate": 7.394265759500348e-05, |
|
"loss": 0.0597, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8784258608573436, |
|
"grad_norm": 0.3536411225795746, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 0.0212, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.8807683298196299, |
|
"grad_norm": 0.28125354647636414, |
|
"learning_rate": 7.365182146448205e-05, |
|
"loss": 0.052, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.8831107987819161, |
|
"grad_norm": 0.12258744984865189, |
|
"learning_rate": 7.350601462458024e-05, |
|
"loss": 0.02, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.8854532677442024, |
|
"grad_norm": 0.5056569576263428, |
|
"learning_rate": 7.335995072666848e-05, |
|
"loss": 0.035, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.8877957367064886, |
|
"grad_norm": 0.2552855610847473, |
|
"learning_rate": 7.32136313680782e-05, |
|
"loss": 0.0421, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.8901382056687749, |
|
"grad_norm": 0.05761013180017471, |
|
"learning_rate": 7.30670581489344e-05, |
|
"loss": 0.0414, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.8924806746310612, |
|
"grad_norm": 0.9745859503746033, |
|
"learning_rate": 7.292023267213835e-05, |
|
"loss": 0.0725, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.8948231435933474, |
|
"grad_norm": 0.2608197033405304, |
|
"learning_rate": 7.277315654334997e-05, |
|
"loss": 0.0405, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.8971656125556337, |
|
"grad_norm": 0.3153429329395294, |
|
"learning_rate": 7.262583137097018e-05, |
|
"loss": 0.0407, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.8995080815179198, |
|
"grad_norm": 0.5415343642234802, |
|
"learning_rate": 7.247825876612353e-05, |
|
"loss": 0.0389, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9018505504802061, |
|
"grad_norm": 0.4772924482822418, |
|
"learning_rate": 7.233044034264034e-05, |
|
"loss": 0.055, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9041930194424924, |
|
"grad_norm": 0.41308316588401794, |
|
"learning_rate": 7.218237771703921e-05, |
|
"loss": 0.0578, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9065354884047786, |
|
"grad_norm": 0.0859963595867157, |
|
"learning_rate": 7.203407250850928e-05, |
|
"loss": 0.0328, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9088779573670649, |
|
"grad_norm": 0.4168371856212616, |
|
"learning_rate": 7.188552633889259e-05, |
|
"loss": 0.0493, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9112204263293512, |
|
"grad_norm": 0.42193326354026794, |
|
"learning_rate": 7.173674083266624e-05, |
|
"loss": 0.052, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.9135628952916374, |
|
"grad_norm": 0.11540161073207855, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 0.0616, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9159053642539237, |
|
"grad_norm": 0.1789163500070572, |
|
"learning_rate": 7.143845832136188e-05, |
|
"loss": 0.0315, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9182478332162098, |
|
"grad_norm": 0.2873396873474121, |
|
"learning_rate": 7.128896457825364e-05, |
|
"loss": 0.0577, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9205903021784961, |
|
"grad_norm": 0.035885997116565704, |
|
"learning_rate": 7.113923802243957e-05, |
|
"loss": 0.0462, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9229327711407824, |
|
"grad_norm": 0.380929172039032, |
|
"learning_rate": 7.09892802913053e-05, |
|
"loss": 0.0285, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9252752401030686, |
|
"grad_norm": 0.21406327188014984, |
|
"learning_rate": 7.083909302476453e-05, |
|
"loss": 0.0255, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9276177090653549, |
|
"grad_norm": 0.04998482018709183, |
|
"learning_rate": 7.068867786524116e-05, |
|
"loss": 0.0285, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9299601780276411, |
|
"grad_norm": 0.19604696333408356, |
|
"learning_rate": 7.053803645765128e-05, |
|
"loss": 0.0345, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.9323026469899274, |
|
"grad_norm": 0.6424615979194641, |
|
"learning_rate": 7.038717044938519e-05, |
|
"loss": 0.0411, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9346451159522137, |
|
"grad_norm": 0.0754154697060585, |
|
"learning_rate": 7.023608149028937e-05, |
|
"loss": 0.0243, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9369875849144998, |
|
"grad_norm": 0.26757097244262695, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 0.0414, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9393300538767861, |
|
"grad_norm": 0.14239585399627686, |
|
"learning_rate": 6.993324133116726e-05, |
|
"loss": 0.0259, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9416725228390724, |
|
"grad_norm": 0.12988215684890747, |
|
"learning_rate": 6.978149344295242e-05, |
|
"loss": 0.0279, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.9440149918013586, |
|
"grad_norm": 0.3678188920021057, |
|
"learning_rate": 6.962952922749457e-05, |
|
"loss": 0.0353, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9463574607636449, |
|
"grad_norm": 0.6559092402458191, |
|
"learning_rate": 6.947735034665002e-05, |
|
"loss": 0.0558, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9486999297259311, |
|
"grad_norm": 0.607363760471344, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 0.0459, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.9510423986882174, |
|
"grad_norm": 0.22406215965747833, |
|
"learning_rate": 6.917235524794558e-05, |
|
"loss": 0.0412, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9533848676505037, |
|
"grad_norm": 0.2519318461418152, |
|
"learning_rate": 6.901954236546323e-05, |
|
"loss": 0.0355, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9557273366127899, |
|
"grad_norm": 0.40484338998794556, |
|
"learning_rate": 6.886652148831279e-05, |
|
"loss": 0.0446, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9580698055750761, |
|
"grad_norm": 0.36861318349838257, |
|
"learning_rate": 6.871329428990602e-05, |
|
"loss": 0.0324, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9604122745373623, |
|
"grad_norm": 0.15483994781970978, |
|
"learning_rate": 6.855986244591104e-05, |
|
"loss": 0.0265, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.9627547434996486, |
|
"grad_norm": 0.12822240591049194, |
|
"learning_rate": 6.840622763423391e-05, |
|
"loss": 0.0251, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9650972124619349, |
|
"grad_norm": 0.2436823546886444, |
|
"learning_rate": 6.825239153500029e-05, |
|
"loss": 0.0354, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9674396814242211, |
|
"grad_norm": 0.11992768943309784, |
|
"learning_rate": 6.809835583053715e-05, |
|
"loss": 0.0355, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9697821503865074, |
|
"grad_norm": 0.05282627418637276, |
|
"learning_rate": 6.794412220535426e-05, |
|
"loss": 0.0325, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9721246193487937, |
|
"grad_norm": 0.1702210009098053, |
|
"learning_rate": 6.778969234612584e-05, |
|
"loss": 0.0421, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9744670883110799, |
|
"grad_norm": 0.30918455123901367, |
|
"learning_rate": 6.763506794167208e-05, |
|
"loss": 0.0306, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.9768095572733662, |
|
"grad_norm": 0.18471957743167877, |
|
"learning_rate": 6.748025068294067e-05, |
|
"loss": 0.026, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.9791520262356523, |
|
"grad_norm": 0.2867111265659332, |
|
"learning_rate": 6.732524226298841e-05, |
|
"loss": 0.0368, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.9814944951979386, |
|
"grad_norm": 0.5615723729133606, |
|
"learning_rate": 6.71700443769625e-05, |
|
"loss": 0.0374, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.9838369641602249, |
|
"grad_norm": 0.06628378480672836, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 0.0432, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.9861794331225111, |
|
"grad_norm": 0.24212607741355896, |
|
"learning_rate": 6.685908699762002e-05, |
|
"loss": 0.0446, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.9885219020847974, |
|
"grad_norm": 0.1411833018064499, |
|
"learning_rate": 6.670333090488356e-05, |
|
"loss": 0.0281, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.9908643710470836, |
|
"grad_norm": 0.4957182705402374, |
|
"learning_rate": 6.654739214719641e-05, |
|
"loss": 0.0385, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.9932068400093699, |
|
"grad_norm": 0.2773032486438751, |
|
"learning_rate": 6.639127242987988e-05, |
|
"loss": 0.0351, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.9955493089716562, |
|
"grad_norm": 0.6347845196723938, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 0.0519, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.9978917779339423, |
|
"grad_norm": 0.39392927289009094, |
|
"learning_rate": 6.607849694751977e-05, |
|
"loss": 0.0415, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.0002342468962286, |
|
"grad_norm": 0.12185105681419373, |
|
"learning_rate": 6.592184460293877e-05, |
|
"loss": 0.0413, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.0025767158585148, |
|
"grad_norm": 0.4016129970550537, |
|
"learning_rate": 6.576501813961609e-05, |
|
"loss": 0.0473, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.0049191848208012, |
|
"grad_norm": 0.10202305018901825, |
|
"learning_rate": 6.56080192725808e-05, |
|
"loss": 0.0476, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.0072616537830874, |
|
"grad_norm": 0.08643211424350739, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.0363, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.0096041227453736, |
|
"grad_norm": 0.4279628396034241, |
|
"learning_rate": 6.529351119689688e-05, |
|
"loss": 0.0343, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.0119465917076598, |
|
"grad_norm": 0.0435931533575058, |
|
"learning_rate": 6.513600542765817e-05, |
|
"loss": 0.0363, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.0142890606699462, |
|
"grad_norm": 0.11314094811677933, |
|
"learning_rate": 6.497833413348909e-05, |
|
"loss": 0.0409, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.0166315296322324, |
|
"grad_norm": 0.049418941140174866, |
|
"learning_rate": 6.48204990386577e-05, |
|
"loss": 0.027, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.0189739985945185, |
|
"grad_norm": 0.0937579795718193, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 0.0386, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.021316467556805, |
|
"grad_norm": 0.17256158590316772, |
|
"learning_rate": 6.450434435301751e-05, |
|
"loss": 0.0283, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.0236589365190911, |
|
"grad_norm": 0.41623151302337646, |
|
"learning_rate": 6.43460282196257e-05, |
|
"loss": 0.0309, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.0260014054813773, |
|
"grad_norm": 0.25574249029159546, |
|
"learning_rate": 6.418755520036775e-05, |
|
"loss": 0.017, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.0283438744436637, |
|
"grad_norm": 0.12465788424015045, |
|
"learning_rate": 6.402892702827916e-05, |
|
"loss": 0.028, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.03068634340595, |
|
"grad_norm": 0.2367735058069229, |
|
"learning_rate": 6.387014543809223e-05, |
|
"loss": 0.0288, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.033028812368236, |
|
"grad_norm": 0.15218676626682281, |
|
"learning_rate": 6.371121216621698e-05, |
|
"loss": 0.0414, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.0353712813305225, |
|
"grad_norm": 0.09345823526382446, |
|
"learning_rate": 6.355212895072223e-05, |
|
"loss": 0.0348, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.0377137502928087, |
|
"grad_norm": 0.25038620829582214, |
|
"learning_rate": 6.339289753131649e-05, |
|
"loss": 0.0472, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.0400562192550948, |
|
"grad_norm": 0.5955792665481567, |
|
"learning_rate": 6.323351964932908e-05, |
|
"loss": 0.0612, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.042398688217381, |
|
"grad_norm": 0.10471931844949722, |
|
"learning_rate": 6.307399704769099e-05, |
|
"loss": 0.0319, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.0447411571796674, |
|
"grad_norm": 0.3728072941303253, |
|
"learning_rate": 6.291433147091583e-05, |
|
"loss": 0.0346, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.0470836261419536, |
|
"grad_norm": 0.13940206170082092, |
|
"learning_rate": 6.275452466508077e-05, |
|
"loss": 0.0315, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.0494260951042398, |
|
"grad_norm": 0.24892286956310272, |
|
"learning_rate": 6.259457837780742e-05, |
|
"loss": 0.0271, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.0517685640665262, |
|
"grad_norm": 0.09227164089679718, |
|
"learning_rate": 6.243449435824276e-05, |
|
"loss": 0.035, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.0541110330288124, |
|
"grad_norm": 0.4062785804271698, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 0.0381, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0564535019910986, |
|
"grad_norm": 0.10490421950817108, |
|
"learning_rate": 6.211392012633932e-05, |
|
"loss": 0.0424, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.058795970953385, |
|
"grad_norm": 0.08822830021381378, |
|
"learning_rate": 6.195343341974899e-05, |
|
"loss": 0.0484, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.0611384399156711, |
|
"grad_norm": 0.22914232313632965, |
|
"learning_rate": 6.179281599232591e-05, |
|
"loss": 0.0388, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.0634809088779573, |
|
"grad_norm": 0.6712221503257751, |
|
"learning_rate": 6.163206960055651e-05, |
|
"loss": 0.0853, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.0658233778402435, |
|
"grad_norm": 0.2438327521085739, |
|
"learning_rate": 6.147119600233758e-05, |
|
"loss": 0.0177, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.06816584680253, |
|
"grad_norm": 0.45352616906166077, |
|
"learning_rate": 6.131019695695702e-05, |
|
"loss": 0.0798, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.070508315764816, |
|
"grad_norm": 0.17237244546413422, |
|
"learning_rate": 6.11490742250746e-05, |
|
"loss": 0.037, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.0728507847271023, |
|
"grad_norm": 0.7011030316352844, |
|
"learning_rate": 6.0987829568702656e-05, |
|
"loss": 0.0549, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.0751932536893887, |
|
"grad_norm": 0.14807315170764923, |
|
"learning_rate": 6.0826464751186994e-05, |
|
"loss": 0.0483, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.0775357226516749, |
|
"grad_norm": 0.42932969331741333, |
|
"learning_rate": 6.066498153718735e-05, |
|
"loss": 0.0388, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.079878191613961, |
|
"grad_norm": 0.13377119600772858, |
|
"learning_rate": 6.05033816926583e-05, |
|
"loss": 0.0464, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.0822206605762474, |
|
"grad_norm": 0.13043726980686188, |
|
"learning_rate": 6.034166698482984e-05, |
|
"loss": 0.0234, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.0845631295385336, |
|
"grad_norm": 0.23946554958820343, |
|
"learning_rate": 6.017983918218812e-05, |
|
"loss": 0.0415, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.0869055985008198, |
|
"grad_norm": 0.11139467358589172, |
|
"learning_rate": 6.001790005445607e-05, |
|
"loss": 0.0397, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.0892480674631062, |
|
"grad_norm": 0.1447746455669403, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 0.0293, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.0915905364253924, |
|
"grad_norm": 0.45925086736679077, |
|
"learning_rate": 5.969369490868042e-05, |
|
"loss": 0.03, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.0939330053876786, |
|
"grad_norm": 0.2177567183971405, |
|
"learning_rate": 5.953143243609235e-05, |
|
"loss": 0.042, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.096275474349965, |
|
"grad_norm": 0.20075875520706177, |
|
"learning_rate": 5.9369065729286245e-05, |
|
"loss": 0.0384, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.0986179433122512, |
|
"grad_norm": 0.16894571483135223, |
|
"learning_rate": 5.9206596563878357e-05, |
|
"loss": 0.0308, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.1009604122745373, |
|
"grad_norm": 0.09761305898427963, |
|
"learning_rate": 5.90440267166055e-05, |
|
"loss": 0.0244, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.1033028812368235, |
|
"grad_norm": 0.04163440316915512, |
|
"learning_rate": 5.888135796530544e-05, |
|
"loss": 0.0191, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.10564535019911, |
|
"grad_norm": 0.27570199966430664, |
|
"learning_rate": 5.871859208889759e-05, |
|
"loss": 0.0222, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.107987819161396, |
|
"grad_norm": 0.2948501706123352, |
|
"learning_rate": 5.85557308673635e-05, |
|
"loss": 0.0442, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.1103302881236823, |
|
"grad_norm": 0.26524093747138977, |
|
"learning_rate": 5.8392776081727385e-05, |
|
"loss": 0.0347, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.1126727570859687, |
|
"grad_norm": 0.26801493763923645, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 0.0299, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.1150152260482549, |
|
"grad_norm": 0.0498003289103508, |
|
"learning_rate": 5.8066592947342555e-05, |
|
"loss": 0.0289, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.117357695010541, |
|
"grad_norm": 0.2827109694480896, |
|
"learning_rate": 5.7903368165680327e-05, |
|
"loss": 0.0328, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.1197001639728275, |
|
"grad_norm": 0.18607333302497864, |
|
"learning_rate": 5.7740056954050084e-05, |
|
"loss": 0.0277, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.1220426329351136, |
|
"grad_norm": 0.10899386554956436, |
|
"learning_rate": 5.757666109839702e-05, |
|
"loss": 0.0397, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.1243851018973998, |
|
"grad_norm": 0.9352733492851257, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 0.0801, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.126727570859686, |
|
"grad_norm": 0.15164723992347717, |
|
"learning_rate": 5.72496226034123e-05, |
|
"loss": 0.0572, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.1290700398219724, |
|
"grad_norm": 0.06457802653312683, |
|
"learning_rate": 5.7085983540521216e-05, |
|
"loss": 0.041, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.1314125087842586, |
|
"grad_norm": 0.13067546486854553, |
|
"learning_rate": 5.692226698644938e-05, |
|
"loss": 0.0345, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.1337549777465448, |
|
"grad_norm": 0.4330101013183594, |
|
"learning_rate": 5.675847473157485e-05, |
|
"loss": 0.0436, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.1360974467088312, |
|
"grad_norm": 0.41848742961883545, |
|
"learning_rate": 5.6594608567103456e-05, |
|
"loss": 0.0216, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.1384399156711174, |
|
"grad_norm": 0.13505397737026215, |
|
"learning_rate": 5.6430670285049314e-05, |
|
"loss": 0.0305, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.1407823846334035, |
|
"grad_norm": 0.4569176435470581, |
|
"learning_rate": 5.6266661678215216e-05, |
|
"loss": 0.0324, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.14312485359569, |
|
"grad_norm": 0.4705914556980133, |
|
"learning_rate": 5.6102584540173006e-05, |
|
"loss": 0.0478, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.1454673225579761, |
|
"grad_norm": 0.276143342256546, |
|
"learning_rate": 5.5938440665244006e-05, |
|
"loss": 0.0578, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.1478097915202623, |
|
"grad_norm": 0.3393331468105316, |
|
"learning_rate": 5.577423184847932e-05, |
|
"loss": 0.0507, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.1501522604825487, |
|
"grad_norm": 0.18119889497756958, |
|
"learning_rate": 5.560995988564023e-05, |
|
"loss": 0.0197, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.1524947294448349, |
|
"grad_norm": 0.0739196389913559, |
|
"learning_rate": 5.544562657317863e-05, |
|
"loss": 0.0297, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.154837198407121, |
|
"grad_norm": 0.22677703201770782, |
|
"learning_rate": 5.52812337082173e-05, |
|
"loss": 0.0407, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.1571796673694075, |
|
"grad_norm": 0.054532766342163086, |
|
"learning_rate": 5.511678308853026e-05, |
|
"loss": 0.0448, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.1595221363316937, |
|
"grad_norm": 0.45871463418006897, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 0.0316, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.1618646052939798, |
|
"grad_norm": 0.09669110924005508, |
|
"learning_rate": 5.478771577921351e-05, |
|
"loss": 0.0404, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.164207074256266, |
|
"grad_norm": 0.1810620278120041, |
|
"learning_rate": 5.462310268821118e-05, |
|
"loss": 0.0233, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.1665495432185524, |
|
"grad_norm": 0.10690245032310486, |
|
"learning_rate": 5.445843903969854e-05, |
|
"loss": 0.033, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.1688920121808386, |
|
"grad_norm": 0.3685993552207947, |
|
"learning_rate": 5.4293726634410855e-05, |
|
"loss": 0.0204, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.1712344811431248, |
|
"grad_norm": 0.17481215298175812, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 0.0269, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.1735769501054112, |
|
"grad_norm": 0.6450178027153015, |
|
"learning_rate": 5.396416275909779e-05, |
|
"loss": 0.052, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.1759194190676974, |
|
"grad_norm": 0.0964297205209732, |
|
"learning_rate": 5.379931489313016e-05, |
|
"loss": 0.0299, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.1782618880299836, |
|
"grad_norm": 0.06013895943760872, |
|
"learning_rate": 5.363442547846356e-05, |
|
"loss": 0.0334, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.1806043569922697, |
|
"grad_norm": 0.032787106931209564, |
|
"learning_rate": 5.3469496318302204e-05, |
|
"loss": 0.0506, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.1829468259545561, |
|
"grad_norm": 0.3833360970020294, |
|
"learning_rate": 5.330452921628497e-05, |
|
"loss": 0.0331, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.1852892949168423, |
|
"grad_norm": 0.08078952878713608, |
|
"learning_rate": 5.313952597646568e-05, |
|
"loss": 0.0171, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.1876317638791285, |
|
"grad_norm": 0.09187212586402893, |
|
"learning_rate": 5.297448840329329e-05, |
|
"loss": 0.0195, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.189974232841415, |
|
"grad_norm": 0.2530211806297302, |
|
"learning_rate": 5.280941830159227e-05, |
|
"loss": 0.0219, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.192316701803701, |
|
"grad_norm": 0.059026945382356644, |
|
"learning_rate": 5.264431747654284e-05, |
|
"loss": 0.0362, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.1946591707659873, |
|
"grad_norm": 0.04210277274250984, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 0.0314, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.1970016397282737, |
|
"grad_norm": 0.4919138550758362, |
|
"learning_rate": 5.231403087877955e-05, |
|
"loss": 0.0335, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.1993441086905599, |
|
"grad_norm": 0.06546583771705627, |
|
"learning_rate": 5.214884871802703e-05, |
|
"loss": 0.0223, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.201686577652846, |
|
"grad_norm": 0.08152215927839279, |
|
"learning_rate": 5.198364305780922e-05, |
|
"loss": 0.0316, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.2040290466151324, |
|
"grad_norm": 0.2411283552646637, |
|
"learning_rate": 5.1818415704788725e-05, |
|
"loss": 0.0669, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.2063715155774186, |
|
"grad_norm": 0.49666517972946167, |
|
"learning_rate": 5.165316846586541e-05, |
|
"loss": 0.041, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.2087139845397048, |
|
"grad_norm": 0.08363020420074463, |
|
"learning_rate": 5.148790314815663e-05, |
|
"loss": 0.0209, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.2110564535019912, |
|
"grad_norm": 0.04317115619778633, |
|
"learning_rate": 5.132262155897739e-05, |
|
"loss": 0.0367, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.2133989224642774, |
|
"grad_norm": 0.1066800057888031, |
|
"learning_rate": 5.1157325505820694e-05, |
|
"loss": 0.0399, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.2157413914265636, |
|
"grad_norm": 0.17649437487125397, |
|
"learning_rate": 5.0992016796337686e-05, |
|
"loss": 0.0236, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.21808386038885, |
|
"grad_norm": 0.14966139197349548, |
|
"learning_rate": 5.0826697238317935e-05, |
|
"loss": 0.0195, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.2204263293511362, |
|
"grad_norm": 0.03593892604112625, |
|
"learning_rate": 5.066136863966963e-05, |
|
"loss": 0.0202, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.2227687983134223, |
|
"grad_norm": 0.46276217699050903, |
|
"learning_rate": 5.0496032808399815e-05, |
|
"loss": 0.0464, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.2251112672757085, |
|
"grad_norm": 0.21946477890014648, |
|
"learning_rate": 5.033069155259471e-05, |
|
"loss": 0.0301, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.227453736237995, |
|
"grad_norm": 0.08784784376621246, |
|
"learning_rate": 5.016534668039976e-05, |
|
"loss": 0.0316, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.229796205200281, |
|
"grad_norm": 0.1410629153251648, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0263, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.2321386741625673, |
|
"grad_norm": 0.07868409156799316, |
|
"learning_rate": 4.9834653319600246e-05, |
|
"loss": 0.0213, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.2344811431248537, |
|
"grad_norm": 0.215213343501091, |
|
"learning_rate": 4.96693084474053e-05, |
|
"loss": 0.0457, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.2368236120871399, |
|
"grad_norm": 0.16864515841007233, |
|
"learning_rate": 4.950396719160018e-05, |
|
"loss": 0.0336, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.239166081049426, |
|
"grad_norm": 0.0474487841129303, |
|
"learning_rate": 4.93386313603304e-05, |
|
"loss": 0.0227, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.2415085500117122, |
|
"grad_norm": 0.5898747444152832, |
|
"learning_rate": 4.917330276168208e-05, |
|
"loss": 0.0165, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.2438510189739986, |
|
"grad_norm": 0.4065062403678894, |
|
"learning_rate": 4.9007983203662326e-05, |
|
"loss": 0.0271, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.2461934879362848, |
|
"grad_norm": 0.19243858754634857, |
|
"learning_rate": 4.884267449417931e-05, |
|
"loss": 0.0222, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.248535956898571, |
|
"grad_norm": 0.14905819296836853, |
|
"learning_rate": 4.867737844102261e-05, |
|
"loss": 0.0183, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.2508784258608574, |
|
"grad_norm": 0.28917795419692993, |
|
"learning_rate": 4.851209685184338e-05, |
|
"loss": 0.0194, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.2532208948231436, |
|
"grad_norm": 0.3423207104206085, |
|
"learning_rate": 4.834683153413459e-05, |
|
"loss": 0.0281, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.2555633637854298, |
|
"grad_norm": 0.04684186726808548, |
|
"learning_rate": 4.818158429521129e-05, |
|
"loss": 0.0266, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.2579058327477162, |
|
"grad_norm": 0.27714163064956665, |
|
"learning_rate": 4.801635694219079e-05, |
|
"loss": 0.0468, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.2602483017100023, |
|
"grad_norm": 0.1844978630542755, |
|
"learning_rate": 4.785115128197298e-05, |
|
"loss": 0.0392, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.2625907706722885, |
|
"grad_norm": 0.36138930916786194, |
|
"learning_rate": 4.7685969121220456e-05, |
|
"loss": 0.029, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.264933239634575, |
|
"grad_norm": 0.3211914896965027, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 0.0611, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.2672757085968611, |
|
"grad_norm": 0.5163668990135193, |
|
"learning_rate": 4.735568252345718e-05, |
|
"loss": 0.0481, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.2696181775591473, |
|
"grad_norm": 0.5117266178131104, |
|
"learning_rate": 4.7190581698407725e-05, |
|
"loss": 0.0326, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.2719606465214337, |
|
"grad_norm": 0.24475805461406708, |
|
"learning_rate": 4.702551159670672e-05, |
|
"loss": 0.0229, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.2743031154837199, |
|
"grad_norm": 0.07154544442892075, |
|
"learning_rate": 4.6860474023534335e-05, |
|
"loss": 0.042, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.276645584446006, |
|
"grad_norm": 0.28115877509117126, |
|
"learning_rate": 4.669547078371504e-05, |
|
"loss": 0.0249, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.2789880534082925, |
|
"grad_norm": 0.22904540598392487, |
|
"learning_rate": 4.65305036816978e-05, |
|
"loss": 0.0339, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.2813305223705787, |
|
"grad_norm": 0.11327308416366577, |
|
"learning_rate": 4.6365574521536445e-05, |
|
"loss": 0.0175, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.2836729913328648, |
|
"grad_norm": 0.1697210669517517, |
|
"learning_rate": 4.620068510686985e-05, |
|
"loss": 0.0362, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.286015460295151, |
|
"grad_norm": 0.08553613722324371, |
|
"learning_rate": 4.60358372409022e-05, |
|
"loss": 0.0159, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.2883579292574374, |
|
"grad_norm": 0.07890176773071289, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 0.0288, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.2907003982197236, |
|
"grad_norm": 0.33075398206710815, |
|
"learning_rate": 4.570627336558915e-05, |
|
"loss": 0.0262, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.2930428671820098, |
|
"grad_norm": 0.09929897636175156, |
|
"learning_rate": 4.554156096030149e-05, |
|
"loss": 0.0231, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.295385336144296, |
|
"grad_norm": 0.1128670945763588, |
|
"learning_rate": 4.537689731178883e-05, |
|
"loss": 0.0201, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.2977278051065824, |
|
"grad_norm": 0.05418454855680466, |
|
"learning_rate": 4.5212284220786494e-05, |
|
"loss": 0.0404, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.3000702740688685, |
|
"grad_norm": 0.1747845560312271, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 0.0324, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.3024127430311547, |
|
"grad_norm": 0.6264855265617371, |
|
"learning_rate": 4.488321691146975e-05, |
|
"loss": 0.0607, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.3047552119934411, |
|
"grad_norm": 0.20012634992599487, |
|
"learning_rate": 4.471876629178273e-05, |
|
"loss": 0.0336, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.3070976809557273, |
|
"grad_norm": 0.07151951640844345, |
|
"learning_rate": 4.4554373426821374e-05, |
|
"loss": 0.0199, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.3094401499180135, |
|
"grad_norm": 0.09090318530797958, |
|
"learning_rate": 4.439004011435979e-05, |
|
"loss": 0.0263, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.3117826188803, |
|
"grad_norm": 0.09504502266645432, |
|
"learning_rate": 4.4225768151520694e-05, |
|
"loss": 0.038, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.314125087842586, |
|
"grad_norm": 0.19809271395206451, |
|
"learning_rate": 4.406155933475599e-05, |
|
"loss": 0.0376, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.3164675568048723, |
|
"grad_norm": 0.2558313012123108, |
|
"learning_rate": 4.3897415459827e-05, |
|
"loss": 0.043, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.3188100257671587, |
|
"grad_norm": 0.08637325465679169, |
|
"learning_rate": 4.373333832178478e-05, |
|
"loss": 0.0341, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.3211524947294448, |
|
"grad_norm": 0.06880134344100952, |
|
"learning_rate": 4.3569329714950704e-05, |
|
"loss": 0.0229, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.323494963691731, |
|
"grad_norm": 0.16358880698680878, |
|
"learning_rate": 4.3405391432896555e-05, |
|
"loss": 0.0387, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.3258374326540174, |
|
"grad_norm": 0.05642487108707428, |
|
"learning_rate": 4.324152526842517e-05, |
|
"loss": 0.0291, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.3281799016163036, |
|
"grad_norm": 0.13398276269435883, |
|
"learning_rate": 4.307773301355062e-05, |
|
"loss": 0.0449, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.3305223705785898, |
|
"grad_norm": 0.41730400919914246, |
|
"learning_rate": 4.291401645947879e-05, |
|
"loss": 0.0336, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.3328648395408762, |
|
"grad_norm": 0.1082252785563469, |
|
"learning_rate": 4.275037739658771e-05, |
|
"loss": 0.0159, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.3352073085031624, |
|
"grad_norm": 0.5044443607330322, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 0.026, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.3375497774654486, |
|
"grad_norm": 0.19207948446273804, |
|
"learning_rate": 4.2423338901602985e-05, |
|
"loss": 0.0205, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.339892246427735, |
|
"grad_norm": 0.14319565892219543, |
|
"learning_rate": 4.2259943045949934e-05, |
|
"loss": 0.0174, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.3422347153900211, |
|
"grad_norm": 0.0638875886797905, |
|
"learning_rate": 4.209663183431969e-05, |
|
"loss": 0.0272, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.3445771843523073, |
|
"grad_norm": 0.5683619379997253, |
|
"learning_rate": 4.1933407052657456e-05, |
|
"loss": 0.0265, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.3469196533145935, |
|
"grad_norm": 0.1282253861427307, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 0.0285, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.3492621222768797, |
|
"grad_norm": 0.2435198575258255, |
|
"learning_rate": 4.160722391827262e-05, |
|
"loss": 0.0348, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.351604591239166, |
|
"grad_norm": 0.10652618855237961, |
|
"learning_rate": 4.14442691326365e-05, |
|
"loss": 0.0409, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.3539470602014523, |
|
"grad_norm": 0.06271979957818985, |
|
"learning_rate": 4.1281407911102425e-05, |
|
"loss": 0.0559, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.3562895291637385, |
|
"grad_norm": 0.05037263408303261, |
|
"learning_rate": 4.111864203469457e-05, |
|
"loss": 0.0263, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.3586319981260249, |
|
"grad_norm": 0.2569263279438019, |
|
"learning_rate": 4.095597328339452e-05, |
|
"loss": 0.0259, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.360974467088311, |
|
"grad_norm": 0.39117732644081116, |
|
"learning_rate": 4.079340343612165e-05, |
|
"loss": 0.0413, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.3633169360505972, |
|
"grad_norm": 0.0529431588947773, |
|
"learning_rate": 4.063093427071376e-05, |
|
"loss": 0.0615, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.3656594050128836, |
|
"grad_norm": 0.18688374757766724, |
|
"learning_rate": 4.046856756390767e-05, |
|
"loss": 0.0184, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.3680018739751698, |
|
"grad_norm": 0.08132046461105347, |
|
"learning_rate": 4.0306305091319595e-05, |
|
"loss": 0.0203, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.370344342937456, |
|
"grad_norm": 0.2862519323825836, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 0.0497, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.3726868118997424, |
|
"grad_norm": 0.12356792390346527, |
|
"learning_rate": 3.9982099945543945e-05, |
|
"loss": 0.0202, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.3750292808620286, |
|
"grad_norm": 0.43368279933929443, |
|
"learning_rate": 3.982016081781189e-05, |
|
"loss": 0.0305, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.3773717498243148, |
|
"grad_norm": 0.03974668309092522, |
|
"learning_rate": 3.965833301517017e-05, |
|
"loss": 0.0262, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.3797142187866012, |
|
"grad_norm": 0.16461171209812164, |
|
"learning_rate": 3.949661830734172e-05, |
|
"loss": 0.0375, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.3820566877488873, |
|
"grad_norm": 0.06088129058480263, |
|
"learning_rate": 3.933501846281267e-05, |
|
"loss": 0.0192, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.3843991567111735, |
|
"grad_norm": 0.2690442204475403, |
|
"learning_rate": 3.917353524881302e-05, |
|
"loss": 0.0159, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.38674162567346, |
|
"grad_norm": 0.09126674383878708, |
|
"learning_rate": 3.901217043129735e-05, |
|
"loss": 0.0334, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.3890840946357461, |
|
"grad_norm": 0.11212047934532166, |
|
"learning_rate": 3.8850925774925425e-05, |
|
"loss": 0.0208, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.3914265635980323, |
|
"grad_norm": 0.03019798919558525, |
|
"learning_rate": 3.8689803043043e-05, |
|
"loss": 0.0218, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.3937690325603187, |
|
"grad_norm": 0.11158014088869095, |
|
"learning_rate": 3.852880399766243e-05, |
|
"loss": 0.0327, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.3961115015226049, |
|
"grad_norm": 0.1466631442308426, |
|
"learning_rate": 3.836793039944349e-05, |
|
"loss": 0.0316, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.398453970484891, |
|
"grad_norm": 0.04972492530941963, |
|
"learning_rate": 3.820718400767409e-05, |
|
"loss": 0.0204, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.4007964394471772, |
|
"grad_norm": 0.18622121214866638, |
|
"learning_rate": 3.8046566580251e-05, |
|
"loss": 0.0446, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.4031389084094636, |
|
"grad_norm": 0.4047488868236542, |
|
"learning_rate": 3.788607987366069e-05, |
|
"loss": 0.0422, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.4054813773717498, |
|
"grad_norm": 0.043907005339860916, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 0.0279, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.407823846334036, |
|
"grad_norm": 0.2679661214351654, |
|
"learning_rate": 3.756550564175727e-05, |
|
"loss": 0.0209, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.4101663152963222, |
|
"grad_norm": 0.0252488162368536, |
|
"learning_rate": 3.74054216221926e-05, |
|
"loss": 0.0187, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.4125087842586086, |
|
"grad_norm": 0.03220526501536369, |
|
"learning_rate": 3.7245475334919246e-05, |
|
"loss": 0.0235, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.4148512532208948, |
|
"grad_norm": 0.132725328207016, |
|
"learning_rate": 3.7085668529084184e-05, |
|
"loss": 0.0231, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.417193722183181, |
|
"grad_norm": 0.17545637488365173, |
|
"learning_rate": 3.6926002952309016e-05, |
|
"loss": 0.0259, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.4195361911454674, |
|
"grad_norm": 0.197429358959198, |
|
"learning_rate": 3.676648035067093e-05, |
|
"loss": 0.0274, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.4218786601077535, |
|
"grad_norm": 0.06819231063127518, |
|
"learning_rate": 3.6607102468683526e-05, |
|
"loss": 0.0355, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.4242211290700397, |
|
"grad_norm": 0.16003242135047913, |
|
"learning_rate": 3.6447871049277796e-05, |
|
"loss": 0.0376, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.4265635980323261, |
|
"grad_norm": 0.13673585653305054, |
|
"learning_rate": 3.628878783378302e-05, |
|
"loss": 0.0213, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.4289060669946123, |
|
"grad_norm": 0.15434902906417847, |
|
"learning_rate": 3.612985456190778e-05, |
|
"loss": 0.0126, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.4312485359568985, |
|
"grad_norm": 0.17395956814289093, |
|
"learning_rate": 3.597107297172084e-05, |
|
"loss": 0.084, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.433591004919185, |
|
"grad_norm": 0.04844974726438522, |
|
"learning_rate": 3.581244479963225e-05, |
|
"loss": 0.0219, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.435933473881471, |
|
"grad_norm": 0.04163607209920883, |
|
"learning_rate": 3.5653971780374295e-05, |
|
"loss": 0.0279, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.4382759428437573, |
|
"grad_norm": 0.11247994005680084, |
|
"learning_rate": 3.5495655646982505e-05, |
|
"loss": 0.0388, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.4406184118060437, |
|
"grad_norm": 0.10106071829795837, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 0.0197, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.4429608807683298, |
|
"grad_norm": 0.3352503776550293, |
|
"learning_rate": 3.517950096134232e-05, |
|
"loss": 0.0306, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.445303349730616, |
|
"grad_norm": 0.23961161077022552, |
|
"learning_rate": 3.5021665866510925e-05, |
|
"loss": 0.0361, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.4476458186929024, |
|
"grad_norm": 0.27124881744384766, |
|
"learning_rate": 3.4863994572341843e-05, |
|
"loss": 0.0215, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.4499882876551886, |
|
"grad_norm": 0.19891873002052307, |
|
"learning_rate": 3.470648880310313e-05, |
|
"loss": 0.0356, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.4523307566174748, |
|
"grad_norm": 0.2036479115486145, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.024, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.4546732255797612, |
|
"grad_norm": 0.20012417435646057, |
|
"learning_rate": 3.439198072741921e-05, |
|
"loss": 0.0601, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.4570156945420474, |
|
"grad_norm": 0.09231871366500854, |
|
"learning_rate": 3.423498186038393e-05, |
|
"loss": 0.0264, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.4593581635043336, |
|
"grad_norm": 0.08506989479064941, |
|
"learning_rate": 3.407815539706124e-05, |
|
"loss": 0.0326, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.4617006324666197, |
|
"grad_norm": 0.07338278740644455, |
|
"learning_rate": 3.392150305248024e-05, |
|
"loss": 0.0261, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.4640431014289061, |
|
"grad_norm": 0.04994959384202957, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 0.0155, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.4663855703911923, |
|
"grad_norm": 0.14995336532592773, |
|
"learning_rate": 3.360872757012011e-05, |
|
"loss": 0.0215, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.4687280393534785, |
|
"grad_norm": 0.08906183391809464, |
|
"learning_rate": 3.3452607852803584e-05, |
|
"loss": 0.0165, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.4710705083157647, |
|
"grad_norm": 0.07266787439584732, |
|
"learning_rate": 3.329666909511645e-05, |
|
"loss": 0.0368, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.473412977278051, |
|
"grad_norm": 0.5040260553359985, |
|
"learning_rate": 3.3140913002379995e-05, |
|
"loss": 0.0261, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.4757554462403373, |
|
"grad_norm": 0.1433238685131073, |
|
"learning_rate": 3.298534127791785e-05, |
|
"loss": 0.0175, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.4780979152026235, |
|
"grad_norm": 0.3917306363582611, |
|
"learning_rate": 3.282995562303754e-05, |
|
"loss": 0.0235, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.4804403841649099, |
|
"grad_norm": 0.07920818775892258, |
|
"learning_rate": 3.267475773701161e-05, |
|
"loss": 0.0428, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.482782853127196, |
|
"grad_norm": 0.07408647239208221, |
|
"learning_rate": 3.251974931705933e-05, |
|
"loss": 0.0255, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.4851253220894822, |
|
"grad_norm": 0.0957607626914978, |
|
"learning_rate": 3.236493205832795e-05, |
|
"loss": 0.0279, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.4874677910517686, |
|
"grad_norm": 0.372249037027359, |
|
"learning_rate": 3.221030765387417e-05, |
|
"loss": 0.0302, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.4898102600140548, |
|
"grad_norm": 0.20557020604610443, |
|
"learning_rate": 3.205587779464576e-05, |
|
"loss": 0.0374, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.492152728976341, |
|
"grad_norm": 0.2854403257369995, |
|
"learning_rate": 3.190164416946285e-05, |
|
"loss": 0.0234, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.4944951979386274, |
|
"grad_norm": 0.023650668561458588, |
|
"learning_rate": 3.1747608464999725e-05, |
|
"loss": 0.028, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.4968376669009136, |
|
"grad_norm": 0.3256511390209198, |
|
"learning_rate": 3.1593772365766105e-05, |
|
"loss": 0.0349, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.4991801358631998, |
|
"grad_norm": 0.10362248122692108, |
|
"learning_rate": 3.144013755408895e-05, |
|
"loss": 0.0181, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.5015226048254862, |
|
"grad_norm": 0.22891394793987274, |
|
"learning_rate": 3.128670571009399e-05, |
|
"loss": 0.0139, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.5038650737877723, |
|
"grad_norm": 0.3262953460216522, |
|
"learning_rate": 3.113347851168721e-05, |
|
"loss": 0.0276, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.5062075427500585, |
|
"grad_norm": 0.04172496870160103, |
|
"learning_rate": 3.098045763453678e-05, |
|
"loss": 0.0151, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.508550011712345, |
|
"grad_norm": 0.10430093109607697, |
|
"learning_rate": 3.082764475205442e-05, |
|
"loss": 0.0151, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.510892480674631, |
|
"grad_norm": 0.061427149921655655, |
|
"learning_rate": 3.0675041535377405e-05, |
|
"loss": 0.0257, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.5132349496369173, |
|
"grad_norm": 0.3583897054195404, |
|
"learning_rate": 3.052264965335e-05, |
|
"loss": 0.0228, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.5155774185992037, |
|
"grad_norm": 0.3000676929950714, |
|
"learning_rate": 3.0370470772505433e-05, |
|
"loss": 0.0319, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.5179198875614897, |
|
"grad_norm": 0.054317738860845566, |
|
"learning_rate": 3.0218506557047598e-05, |
|
"loss": 0.0258, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.520262356523776, |
|
"grad_norm": 0.554436981678009, |
|
"learning_rate": 3.006675866883275e-05, |
|
"loss": 0.0423, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.5226048254860625, |
|
"grad_norm": 0.047464508563280106, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 0.0181, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.5249472944483484, |
|
"grad_norm": 0.0966537818312645, |
|
"learning_rate": 2.976391850971065e-05, |
|
"loss": 0.059, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.5272897634106348, |
|
"grad_norm": 0.2305019199848175, |
|
"learning_rate": 2.9612829550614836e-05, |
|
"loss": 0.0525, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.529632232372921, |
|
"grad_norm": 0.0933275818824768, |
|
"learning_rate": 2.9461963542348737e-05, |
|
"loss": 0.0181, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.5319747013352072, |
|
"grad_norm": 0.06837865710258484, |
|
"learning_rate": 2.931132213475884e-05, |
|
"loss": 0.0305, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.5343171702974936, |
|
"grad_norm": 0.3509468138217926, |
|
"learning_rate": 2.916090697523549e-05, |
|
"loss": 0.0246, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.5366596392597798, |
|
"grad_norm": 0.09399297833442688, |
|
"learning_rate": 2.9010719708694722e-05, |
|
"loss": 0.0571, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.539002108222066, |
|
"grad_norm": 0.04773678630590439, |
|
"learning_rate": 2.8860761977560436e-05, |
|
"loss": 0.0219, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.5413445771843524, |
|
"grad_norm": 0.5464432835578918, |
|
"learning_rate": 2.8711035421746367e-05, |
|
"loss": 0.033, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.5436870461466385, |
|
"grad_norm": 0.03716734051704407, |
|
"learning_rate": 2.8561541678638142e-05, |
|
"loss": 0.0238, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.5460295151089247, |
|
"grad_norm": 0.07261854410171509, |
|
"learning_rate": 2.8412282383075363e-05, |
|
"loss": 0.0355, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.5483719840712111, |
|
"grad_norm": 0.3284207880496979, |
|
"learning_rate": 2.8263259167333777e-05, |
|
"loss": 0.0179, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.5507144530334973, |
|
"grad_norm": 0.16059207916259766, |
|
"learning_rate": 2.811447366110741e-05, |
|
"loss": 0.0221, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.5530569219957835, |
|
"grad_norm": 0.1849690079689026, |
|
"learning_rate": 2.7965927491490705e-05, |
|
"loss": 0.0287, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.55539939095807, |
|
"grad_norm": 0.39172595739364624, |
|
"learning_rate": 2.7817622282960815e-05, |
|
"loss": 0.0475, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.557741859920356, |
|
"grad_norm": 0.30010920763015747, |
|
"learning_rate": 2.766955965735968e-05, |
|
"loss": 0.0312, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.5600843288826423, |
|
"grad_norm": 0.4200305640697479, |
|
"learning_rate": 2.7521741233876496e-05, |
|
"loss": 0.0276, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.5624267978449287, |
|
"grad_norm": 0.06515451520681381, |
|
"learning_rate": 2.7374168629029813e-05, |
|
"loss": 0.0165, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.5647692668072148, |
|
"grad_norm": 0.19618399441242218, |
|
"learning_rate": 2.7226843456650037e-05, |
|
"loss": 0.019, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.567111735769501, |
|
"grad_norm": 0.4492703378200531, |
|
"learning_rate": 2.707976732786166e-05, |
|
"loss": 0.0244, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.5694542047317874, |
|
"grad_norm": 0.18303832411766052, |
|
"learning_rate": 2.693294185106562e-05, |
|
"loss": 0.0172, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.5717966736940734, |
|
"grad_norm": 0.10762883722782135, |
|
"learning_rate": 2.6786368631921836e-05, |
|
"loss": 0.0286, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.5741391426563598, |
|
"grad_norm": 0.1200929656624794, |
|
"learning_rate": 2.6640049273331515e-05, |
|
"loss": 0.0264, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.5764816116186462, |
|
"grad_norm": 0.025387238711118698, |
|
"learning_rate": 2.6493985375419778e-05, |
|
"loss": 0.0172, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.5788240805809322, |
|
"grad_norm": 0.2033502608537674, |
|
"learning_rate": 2.6348178535517966e-05, |
|
"loss": 0.0206, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.5811665495432186, |
|
"grad_norm": 0.1873401701450348, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 0.0228, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.583509018505505, |
|
"grad_norm": 0.3058501183986664, |
|
"learning_rate": 2.6057342404996522e-05, |
|
"loss": 0.0178, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.585851487467791, |
|
"grad_norm": 0.07519169896841049, |
|
"learning_rate": 2.591231629491423e-05, |
|
"loss": 0.0242, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.5881939564300773, |
|
"grad_norm": 0.11511756479740143, |
|
"learning_rate": 2.5767553603881767e-05, |
|
"loss": 0.0146, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.5905364253923635, |
|
"grad_norm": 0.3080747425556183, |
|
"learning_rate": 2.562305591500069e-05, |
|
"loss": 0.0226, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.5928788943546497, |
|
"grad_norm": 0.04100322350859642, |
|
"learning_rate": 2.547882480847461e-05, |
|
"loss": 0.0201, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.595221363316936, |
|
"grad_norm": 0.04346079006791115, |
|
"learning_rate": 2.5334861861591753e-05, |
|
"loss": 0.0132, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.5975638322792223, |
|
"grad_norm": 0.37990304827690125, |
|
"learning_rate": 2.5191168648707887e-05, |
|
"loss": 0.0281, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.5999063012415085, |
|
"grad_norm": 0.4856362044811249, |
|
"learning_rate": 2.5047746741228978e-05, |
|
"loss": 0.0314, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.6022487702037949, |
|
"grad_norm": 0.10676129907369614, |
|
"learning_rate": 2.490459770759398e-05, |
|
"loss": 0.0527, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.604591239166081, |
|
"grad_norm": 0.05450962483882904, |
|
"learning_rate": 2.476172311325783e-05, |
|
"loss": 0.0316, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.6069337081283672, |
|
"grad_norm": 0.04609961807727814, |
|
"learning_rate": 2.4619124520674146e-05, |
|
"loss": 0.0272, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.6092761770906536, |
|
"grad_norm": 0.03132042661309242, |
|
"learning_rate": 2.447680348927837e-05, |
|
"loss": 0.0249, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.6116186460529398, |
|
"grad_norm": 0.11953801661729813, |
|
"learning_rate": 2.433476157547044e-05, |
|
"loss": 0.0178, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.613961115015226, |
|
"grad_norm": 0.1899009346961975, |
|
"learning_rate": 2.419300033259798e-05, |
|
"loss": 0.0445, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.6163035839775124, |
|
"grad_norm": 0.04766709730029106, |
|
"learning_rate": 2.405152131093926e-05, |
|
"loss": 0.0144, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.6186460529397986, |
|
"grad_norm": 0.40684422850608826, |
|
"learning_rate": 2.3910326057686127e-05, |
|
"loss": 0.018, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.6209885219020848, |
|
"grad_norm": 0.06992173194885254, |
|
"learning_rate": 2.3769416116927335e-05, |
|
"loss": 0.0183, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.6233309908643712, |
|
"grad_norm": 0.050338905304670334, |
|
"learning_rate": 2.362879302963135e-05, |
|
"loss": 0.0237, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.6256734598266573, |
|
"grad_norm": 0.19553512334823608, |
|
"learning_rate": 2.3488458333629777e-05, |
|
"loss": 0.0339, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.6280159287889435, |
|
"grad_norm": 0.15470145642757416, |
|
"learning_rate": 2.3348413563600325e-05, |
|
"loss": 0.0094, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.63035839775123, |
|
"grad_norm": 0.23403486609458923, |
|
"learning_rate": 2.3208660251050158e-05, |
|
"loss": 0.026, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.6327008667135159, |
|
"grad_norm": 0.13263070583343506, |
|
"learning_rate": 2.3069199924299174e-05, |
|
"loss": 0.0197, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.6350433356758023, |
|
"grad_norm": 0.4499634802341461, |
|
"learning_rate": 2.29300341084631e-05, |
|
"loss": 0.0183, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.6373858046380887, |
|
"grad_norm": 0.020672103390097618, |
|
"learning_rate": 2.279116432543705e-05, |
|
"loss": 0.019, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.6397282736003747, |
|
"grad_norm": 0.08733947575092316, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 0.0317, |
|
"step": 7000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.841140693728e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|