|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6020469596628537, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0012040939193257074, |
|
"grad_norm": 2.0694425106048584, |
|
"learning_rate": 1.2033694344163658e-08, |
|
"loss": 0.6897, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002408187838651415, |
|
"grad_norm": 2.151496171951294, |
|
"learning_rate": 2.4067388688327316e-08, |
|
"loss": 0.6787, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003612281757977122, |
|
"grad_norm": 2.640268564224243, |
|
"learning_rate": 3.610108303249097e-08, |
|
"loss": 0.6639, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00481637567730283, |
|
"grad_norm": 2.6572210788726807, |
|
"learning_rate": 4.813477737665463e-08, |
|
"loss": 0.7152, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006020469596628537, |
|
"grad_norm": 1.7933714389801025, |
|
"learning_rate": 6.016847172081829e-08, |
|
"loss": 0.6503, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007224563515954244, |
|
"grad_norm": 2.3688879013061523, |
|
"learning_rate": 7.220216606498194e-08, |
|
"loss": 0.6827, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.008428657435279952, |
|
"grad_norm": 2.220139265060425, |
|
"learning_rate": 8.42358604091456e-08, |
|
"loss": 0.6443, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.00963275135460566, |
|
"grad_norm": 2.4725093841552734, |
|
"learning_rate": 9.626955475330927e-08, |
|
"loss": 0.6681, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.010836845273931367, |
|
"grad_norm": 1.4149224758148193, |
|
"learning_rate": 1.0830324909747292e-07, |
|
"loss": 0.5592, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.012040939193257074, |
|
"grad_norm": 0.9355699419975281, |
|
"learning_rate": 1.2033694344163658e-07, |
|
"loss": 0.5802, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.013245033112582781, |
|
"grad_norm": 1.0211461782455444, |
|
"learning_rate": 1.3237063778580024e-07, |
|
"loss": 0.5589, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.014449127031908489, |
|
"grad_norm": 1.0006492137908936, |
|
"learning_rate": 1.4440433212996388e-07, |
|
"loss": 0.5421, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.015653220951234198, |
|
"grad_norm": 0.8444674015045166, |
|
"learning_rate": 1.5643802647412754e-07, |
|
"loss": 0.5079, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.016857314870559904, |
|
"grad_norm": 0.7920398712158203, |
|
"learning_rate": 1.684717208182912e-07, |
|
"loss": 0.4898, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.018061408789885613, |
|
"grad_norm": 0.6817948818206787, |
|
"learning_rate": 1.8050541516245487e-07, |
|
"loss": 0.4645, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01926550270921132, |
|
"grad_norm": 0.9353106021881104, |
|
"learning_rate": 1.9253910950661853e-07, |
|
"loss": 0.485, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.020469596628537028, |
|
"grad_norm": 0.6695616841316223, |
|
"learning_rate": 2.045728038507822e-07, |
|
"loss": 0.4647, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.021673690547862733, |
|
"grad_norm": 0.6993837952613831, |
|
"learning_rate": 2.1660649819494583e-07, |
|
"loss": 0.4378, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.022877784467188442, |
|
"grad_norm": 0.7333642244338989, |
|
"learning_rate": 2.286401925391095e-07, |
|
"loss": 0.4288, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.024081878386514148, |
|
"grad_norm": 0.707914412021637, |
|
"learning_rate": 2.4067388688327316e-07, |
|
"loss": 0.4601, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.025285972305839857, |
|
"grad_norm": 0.7626605033874512, |
|
"learning_rate": 2.527075812274368e-07, |
|
"loss": 0.4454, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.026490066225165563, |
|
"grad_norm": 1.2267224788665771, |
|
"learning_rate": 2.647412755716005e-07, |
|
"loss": 0.4398, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.027694160144491272, |
|
"grad_norm": 0.7376552224159241, |
|
"learning_rate": 2.767749699157641e-07, |
|
"loss": 0.4275, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.028898254063816978, |
|
"grad_norm": 0.7109339237213135, |
|
"learning_rate": 2.8880866425992776e-07, |
|
"loss": 0.3996, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.030102347983142687, |
|
"grad_norm": 0.6406791806221008, |
|
"learning_rate": 3.008423586040915e-07, |
|
"loss": 0.4337, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.031306441902468396, |
|
"grad_norm": 0.6780328154563904, |
|
"learning_rate": 3.128760529482551e-07, |
|
"loss": 0.4296, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0325105358217941, |
|
"grad_norm": 0.5574681162834167, |
|
"learning_rate": 3.2490974729241875e-07, |
|
"loss": 0.4123, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.03371462974111981, |
|
"grad_norm": 0.6190093755722046, |
|
"learning_rate": 3.369434416365824e-07, |
|
"loss": 0.3959, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.034918723660445516, |
|
"grad_norm": 0.6488677859306335, |
|
"learning_rate": 3.4897713598074607e-07, |
|
"loss": 0.3883, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.036122817579771226, |
|
"grad_norm": 0.6014848351478577, |
|
"learning_rate": 3.6101083032490974e-07, |
|
"loss": 0.4222, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03732691149909693, |
|
"grad_norm": 0.5347362160682678, |
|
"learning_rate": 3.730445246690734e-07, |
|
"loss": 0.3929, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03853100541842264, |
|
"grad_norm": 1.4445090293884277, |
|
"learning_rate": 3.8507821901323706e-07, |
|
"loss": 0.3798, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.039735099337748346, |
|
"grad_norm": 0.6319730877876282, |
|
"learning_rate": 3.9711191335740067e-07, |
|
"loss": 0.386, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.040939193257074055, |
|
"grad_norm": 0.9257851243019104, |
|
"learning_rate": 4.091456077015644e-07, |
|
"loss": 0.393, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.04214328717639976, |
|
"grad_norm": 0.5936801433563232, |
|
"learning_rate": 4.2117930204572805e-07, |
|
"loss": 0.3912, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04334738109572547, |
|
"grad_norm": 0.686888575553894, |
|
"learning_rate": 4.3321299638989166e-07, |
|
"loss": 0.4015, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.044551475015051176, |
|
"grad_norm": 0.5986278653144836, |
|
"learning_rate": 4.452466907340554e-07, |
|
"loss": 0.3622, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.045755568934376885, |
|
"grad_norm": 0.5603286623954773, |
|
"learning_rate": 4.57280385078219e-07, |
|
"loss": 0.3774, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.04695966285370259, |
|
"grad_norm": 1.2507776021957397, |
|
"learning_rate": 4.6931407942238265e-07, |
|
"loss": 0.3681, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.048163756773028296, |
|
"grad_norm": 0.5886845588684082, |
|
"learning_rate": 4.813477737665463e-07, |
|
"loss": 0.371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.049367850692354005, |
|
"grad_norm": 0.5690301656723022, |
|
"learning_rate": 4.9338146811071e-07, |
|
"loss": 0.3454, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.050571944611679714, |
|
"grad_norm": 0.6363804340362549, |
|
"learning_rate": 5.054151624548736e-07, |
|
"loss": 0.3477, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.05177603853100542, |
|
"grad_norm": 0.49289166927337646, |
|
"learning_rate": 5.174488567990373e-07, |
|
"loss": 0.352, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.052980132450331126, |
|
"grad_norm": 0.5901724696159363, |
|
"learning_rate": 5.29482551143201e-07, |
|
"loss": 0.3514, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.054184226369656835, |
|
"grad_norm": 0.6019484996795654, |
|
"learning_rate": 5.415162454873646e-07, |
|
"loss": 0.3713, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.055388320288982544, |
|
"grad_norm": 0.5057175755500793, |
|
"learning_rate": 5.535499398315282e-07, |
|
"loss": 0.3346, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.056592414208308246, |
|
"grad_norm": 0.4834252893924713, |
|
"learning_rate": 5.655836341756919e-07, |
|
"loss": 0.3638, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.057796508127633955, |
|
"grad_norm": 0.6098750233650208, |
|
"learning_rate": 5.776173285198555e-07, |
|
"loss": 0.3622, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.059000602046959665, |
|
"grad_norm": 0.6201721429824829, |
|
"learning_rate": 5.896510228640193e-07, |
|
"loss": 0.3329, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.060204695966285374, |
|
"grad_norm": 0.7006021738052368, |
|
"learning_rate": 6.01684717208183e-07, |
|
"loss": 0.3487, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.061408789885611076, |
|
"grad_norm": 0.708990216255188, |
|
"learning_rate": 6.137184115523465e-07, |
|
"loss": 0.3448, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.06261288380493679, |
|
"grad_norm": 0.7767229676246643, |
|
"learning_rate": 6.257521058965102e-07, |
|
"loss": 0.3751, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.0638169777242625, |
|
"grad_norm": 0.6051218509674072, |
|
"learning_rate": 6.377858002406738e-07, |
|
"loss": 0.3502, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.0650210716435882, |
|
"grad_norm": 0.7111226916313171, |
|
"learning_rate": 6.498194945848375e-07, |
|
"loss": 0.3625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.06622516556291391, |
|
"grad_norm": 0.7441733479499817, |
|
"learning_rate": 6.618531889290013e-07, |
|
"loss": 0.3269, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.06742925948223961, |
|
"grad_norm": 0.6909326910972595, |
|
"learning_rate": 6.738868832731648e-07, |
|
"loss": 0.3302, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.06863335340156532, |
|
"grad_norm": 0.7504749298095703, |
|
"learning_rate": 6.859205776173285e-07, |
|
"loss": 0.3425, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.06983744732089103, |
|
"grad_norm": 0.5878099799156189, |
|
"learning_rate": 6.979542719614921e-07, |
|
"loss": 0.3504, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.07104154124021674, |
|
"grad_norm": 0.5515761971473694, |
|
"learning_rate": 7.099879663056558e-07, |
|
"loss": 0.3409, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.07224563515954245, |
|
"grad_norm": 0.57797771692276, |
|
"learning_rate": 7.220216606498195e-07, |
|
"loss": 0.3416, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07344972907886815, |
|
"grad_norm": 0.4524708390235901, |
|
"learning_rate": 7.34055354993983e-07, |
|
"loss": 0.3581, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.07465382299819386, |
|
"grad_norm": 0.718927800655365, |
|
"learning_rate": 7.460890493381468e-07, |
|
"loss": 0.3609, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.07585791691751957, |
|
"grad_norm": 0.5666077733039856, |
|
"learning_rate": 7.581227436823105e-07, |
|
"loss": 0.335, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.07706201083684527, |
|
"grad_norm": 0.5896601676940918, |
|
"learning_rate": 7.701564380264741e-07, |
|
"loss": 0.3274, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.07826610475617098, |
|
"grad_norm": 0.6044319868087769, |
|
"learning_rate": 7.821901323706378e-07, |
|
"loss": 0.3407, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.07947019867549669, |
|
"grad_norm": 0.6831541061401367, |
|
"learning_rate": 7.942238267148013e-07, |
|
"loss": 0.3333, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.0806742925948224, |
|
"grad_norm": 0.7124572396278381, |
|
"learning_rate": 8.06257521058965e-07, |
|
"loss": 0.3326, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.08187838651414811, |
|
"grad_norm": 0.732711136341095, |
|
"learning_rate": 8.182912154031288e-07, |
|
"loss": 0.3487, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.08308248043347381, |
|
"grad_norm": 0.7555579543113708, |
|
"learning_rate": 8.303249097472924e-07, |
|
"loss": 0.3218, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.08428657435279951, |
|
"grad_norm": 0.7618419528007507, |
|
"learning_rate": 8.423586040914561e-07, |
|
"loss": 0.3231, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.08549066827212523, |
|
"grad_norm": 0.7383216023445129, |
|
"learning_rate": 8.543922984356197e-07, |
|
"loss": 0.3218, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.08669476219145093, |
|
"grad_norm": 0.5902182459831238, |
|
"learning_rate": 8.664259927797833e-07, |
|
"loss": 0.3367, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.08789885611077664, |
|
"grad_norm": 0.6107906103134155, |
|
"learning_rate": 8.78459687123947e-07, |
|
"loss": 0.3331, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.08910295003010235, |
|
"grad_norm": 0.7179387211799622, |
|
"learning_rate": 8.904933814681108e-07, |
|
"loss": 0.3347, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.09030704394942805, |
|
"grad_norm": 0.8263080716133118, |
|
"learning_rate": 9.025270758122743e-07, |
|
"loss": 0.3247, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.09151113786875377, |
|
"grad_norm": 0.8549688458442688, |
|
"learning_rate": 9.14560770156438e-07, |
|
"loss": 0.3239, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.09271523178807947, |
|
"grad_norm": 0.6674267053604126, |
|
"learning_rate": 9.265944645006016e-07, |
|
"loss": 0.333, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.09391932570740517, |
|
"grad_norm": 0.5892189741134644, |
|
"learning_rate": 9.386281588447653e-07, |
|
"loss": 0.322, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.09512341962673089, |
|
"grad_norm": 0.7087513208389282, |
|
"learning_rate": 9.50661853188929e-07, |
|
"loss": 0.327, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.09632751354605659, |
|
"grad_norm": 0.6016402840614319, |
|
"learning_rate": 9.626955475330926e-07, |
|
"loss": 0.3255, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0975316074653823, |
|
"grad_norm": 0.5783524513244629, |
|
"learning_rate": 9.747292418772562e-07, |
|
"loss": 0.3128, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.09873570138470801, |
|
"grad_norm": 0.6049711108207703, |
|
"learning_rate": 9.8676293622142e-07, |
|
"loss": 0.3257, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.09993979530403371, |
|
"grad_norm": 0.6259274482727051, |
|
"learning_rate": 9.987966305655835e-07, |
|
"loss": 0.3318, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.10114388922335943, |
|
"grad_norm": 0.5331777930259705, |
|
"learning_rate": 9.999964221834556e-07, |
|
"loss": 0.3133, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.10234798314268513, |
|
"grad_norm": 0.5190764665603638, |
|
"learning_rate": 9.999840544882987e-07, |
|
"loss": 0.3349, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.10355207706201083, |
|
"grad_norm": 0.5867928862571716, |
|
"learning_rate": 9.99962852962418e-07, |
|
"loss": 0.3252, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.10475617098133655, |
|
"grad_norm": 0.7667666673660278, |
|
"learning_rate": 9.999328179804064e-07, |
|
"loss": 0.3269, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.10596026490066225, |
|
"grad_norm": 0.5684708952903748, |
|
"learning_rate": 9.998939500729291e-07, |
|
"loss": 0.3204, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.10716435881998795, |
|
"grad_norm": 0.5369793772697449, |
|
"learning_rate": 9.99846249926713e-07, |
|
"loss": 0.2997, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.10836845273931367, |
|
"grad_norm": 0.5773791074752808, |
|
"learning_rate": 9.997897183845347e-07, |
|
"loss": 0.3147, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.10957254665863937, |
|
"grad_norm": 0.571826159954071, |
|
"learning_rate": 9.997243564452064e-07, |
|
"loss": 0.32, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.11077664057796509, |
|
"grad_norm": 0.420244961977005, |
|
"learning_rate": 9.996501652635578e-07, |
|
"loss": 0.3141, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.11198073449729079, |
|
"grad_norm": 0.5253920555114746, |
|
"learning_rate": 9.99567146150415e-07, |
|
"loss": 0.3201, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.11318482841661649, |
|
"grad_norm": 0.49279969930648804, |
|
"learning_rate": 9.994753005725785e-07, |
|
"loss": 0.3076, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.11438892233594221, |
|
"grad_norm": 0.6114805936813354, |
|
"learning_rate": 9.993746301527965e-07, |
|
"loss": 0.3209, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.11559301625526791, |
|
"grad_norm": 1.6514418125152588, |
|
"learning_rate": 9.99265136669737e-07, |
|
"loss": 0.319, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.11679711017459361, |
|
"grad_norm": 0.6415925621986389, |
|
"learning_rate": 9.99146822057955e-07, |
|
"loss": 0.3268, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.11800120409391933, |
|
"grad_norm": 0.5680079460144043, |
|
"learning_rate": 9.990196884078599e-07, |
|
"loss": 0.3139, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.11920529801324503, |
|
"grad_norm": 0.715497612953186, |
|
"learning_rate": 9.988837379656778e-07, |
|
"loss": 0.3143, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.12040939193257075, |
|
"grad_norm": 0.6379466652870178, |
|
"learning_rate": 9.987389731334112e-07, |
|
"loss": 0.3037, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12161348585189645, |
|
"grad_norm": 0.5227240920066833, |
|
"learning_rate": 9.985853964687985e-07, |
|
"loss": 0.3202, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.12281757977122215, |
|
"grad_norm": 0.5148226022720337, |
|
"learning_rate": 9.984230106852658e-07, |
|
"loss": 0.3089, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.12402167369054787, |
|
"grad_norm": 0.8337252140045166, |
|
"learning_rate": 9.982518186518824e-07, |
|
"loss": 0.3093, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.12522576760987358, |
|
"grad_norm": 0.5874176621437073, |
|
"learning_rate": 9.980718233933072e-07, |
|
"loss": 0.3257, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.12642986152919927, |
|
"grad_norm": 0.6203235983848572, |
|
"learning_rate": 9.978830280897373e-07, |
|
"loss": 0.3094, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.127633955448525, |
|
"grad_norm": 0.7386701107025146, |
|
"learning_rate": 9.976854360768501e-07, |
|
"loss": 0.3283, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.1288380493678507, |
|
"grad_norm": 0.7480394244194031, |
|
"learning_rate": 9.97479050845746e-07, |
|
"loss": 0.322, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.1300421432871764, |
|
"grad_norm": 0.6779530048370361, |
|
"learning_rate": 9.97263876042886e-07, |
|
"loss": 0.3263, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.1312462372065021, |
|
"grad_norm": 1.0457607507705688, |
|
"learning_rate": 9.970399154700262e-07, |
|
"loss": 0.324, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.13245033112582782, |
|
"grad_norm": 0.4574492871761322, |
|
"learning_rate": 9.96807173084153e-07, |
|
"loss": 0.3033, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1336544250451535, |
|
"grad_norm": 0.4800940454006195, |
|
"learning_rate": 9.965656529974108e-07, |
|
"loss": 0.3076, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.13485851896447923, |
|
"grad_norm": 0.5336936116218567, |
|
"learning_rate": 9.96315359477031e-07, |
|
"loss": 0.3029, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.13606261288380495, |
|
"grad_norm": 0.9403670430183411, |
|
"learning_rate": 9.960562969452559e-07, |
|
"loss": 0.3019, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.13726670680313063, |
|
"grad_norm": 0.6152085661888123, |
|
"learning_rate": 9.957884699792604e-07, |
|
"loss": 0.3051, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.13847080072245635, |
|
"grad_norm": 0.7313536405563354, |
|
"learning_rate": 9.955118833110716e-07, |
|
"loss": 0.3137, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.13967489464178207, |
|
"grad_norm": 0.47397103905677795, |
|
"learning_rate": 9.95226541827485e-07, |
|
"loss": 0.3214, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.14087898856110775, |
|
"grad_norm": 0.4812333881855011, |
|
"learning_rate": 9.949324505699782e-07, |
|
"loss": 0.3164, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.14208308248043347, |
|
"grad_norm": 0.6729305386543274, |
|
"learning_rate": 9.946296147346215e-07, |
|
"loss": 0.2946, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.1432871763997592, |
|
"grad_norm": 0.6568790078163147, |
|
"learning_rate": 9.943180396719867e-07, |
|
"loss": 0.2929, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.1444912703190849, |
|
"grad_norm": 0.5633556842803955, |
|
"learning_rate": 9.939977308870518e-07, |
|
"loss": 0.3073, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1456953642384106, |
|
"grad_norm": 1.1128957271575928, |
|
"learning_rate": 9.936686940391048e-07, |
|
"loss": 0.3264, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.1468994581577363, |
|
"grad_norm": 0.5192599892616272, |
|
"learning_rate": 9.933309349416428e-07, |
|
"loss": 0.3064, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.14810355207706202, |
|
"grad_norm": 0.49194392561912537, |
|
"learning_rate": 9.92984459562269e-07, |
|
"loss": 0.302, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.1493076459963877, |
|
"grad_norm": 0.5606468915939331, |
|
"learning_rate": 9.926292740225888e-07, |
|
"loss": 0.3037, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.15051173991571343, |
|
"grad_norm": 0.544266939163208, |
|
"learning_rate": 9.922653845981e-07, |
|
"loss": 0.3025, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.15171583383503914, |
|
"grad_norm": 1.0137197971343994, |
|
"learning_rate": 9.918927977180826e-07, |
|
"loss": 0.2998, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.15291992775436483, |
|
"grad_norm": 0.4881134629249573, |
|
"learning_rate": 9.91511519965486e-07, |
|
"loss": 0.2975, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.15412402167369055, |
|
"grad_norm": 0.4854426383972168, |
|
"learning_rate": 9.911215580768106e-07, |
|
"loss": 0.3109, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.15532811559301626, |
|
"grad_norm": 0.5056730508804321, |
|
"learning_rate": 9.90722918941991e-07, |
|
"loss": 0.3121, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.15653220951234195, |
|
"grad_norm": 0.5286668539047241, |
|
"learning_rate": 9.903156096042734e-07, |
|
"loss": 0.2982, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.15773630343166767, |
|
"grad_norm": 0.5490984916687012, |
|
"learning_rate": 9.898996372600903e-07, |
|
"loss": 0.3115, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.15894039735099338, |
|
"grad_norm": 0.614521861076355, |
|
"learning_rate": 9.894750092589349e-07, |
|
"loss": 0.2985, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.16014449127031907, |
|
"grad_norm": 0.5678403973579407, |
|
"learning_rate": 9.8904173310323e-07, |
|
"loss": 0.3046, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.1613485851896448, |
|
"grad_norm": 0.5179656147956848, |
|
"learning_rate": 9.885998164481966e-07, |
|
"loss": 0.3053, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.1625526791089705, |
|
"grad_norm": 0.526849091053009, |
|
"learning_rate": 9.881492671017172e-07, |
|
"loss": 0.3143, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.16375677302829622, |
|
"grad_norm": 0.5683344006538391, |
|
"learning_rate": 9.876900930241991e-07, |
|
"loss": 0.3031, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.1649608669476219, |
|
"grad_norm": 0.5243839621543884, |
|
"learning_rate": 9.872223023284333e-07, |
|
"loss": 0.312, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.16616496086694763, |
|
"grad_norm": 0.5260365605354309, |
|
"learning_rate": 9.867459032794508e-07, |
|
"loss": 0.3037, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.16736905478627334, |
|
"grad_norm": 0.4755154252052307, |
|
"learning_rate": 9.86260904294377e-07, |
|
"loss": 0.2916, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.16857314870559903, |
|
"grad_norm": 0.5555715560913086, |
|
"learning_rate": 9.857673139422833e-07, |
|
"loss": 0.3135, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.16977724262492475, |
|
"grad_norm": 0.5810279250144958, |
|
"learning_rate": 9.85265140944035e-07, |
|
"loss": 0.3104, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.17098133654425046, |
|
"grad_norm": 0.48022618889808655, |
|
"learning_rate": 9.847543941721379e-07, |
|
"loss": 0.3022, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.17218543046357615, |
|
"grad_norm": 0.5191965103149414, |
|
"learning_rate": 9.842350826505802e-07, |
|
"loss": 0.3018, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.17338952438290187, |
|
"grad_norm": 1.2972302436828613, |
|
"learning_rate": 9.837072155546753e-07, |
|
"loss": 0.3026, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.17459361830222758, |
|
"grad_norm": 0.47315987944602966, |
|
"learning_rate": 9.831708022108972e-07, |
|
"loss": 0.311, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.17579771222155327, |
|
"grad_norm": 0.5953189134597778, |
|
"learning_rate": 9.826258520967177e-07, |
|
"loss": 0.3071, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.177001806140879, |
|
"grad_norm": 0.5407562851905823, |
|
"learning_rate": 9.820723748404382e-07, |
|
"loss": 0.31, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.1782059000602047, |
|
"grad_norm": 0.5249618291854858, |
|
"learning_rate": 9.815103802210193e-07, |
|
"loss": 0.2898, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.1794099939795304, |
|
"grad_norm": 0.5347439646720886, |
|
"learning_rate": 9.80939878167908e-07, |
|
"loss": 0.2944, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.1806140878988561, |
|
"grad_norm": 0.49509304761886597, |
|
"learning_rate": 9.80360878760863e-07, |
|
"loss": 0.3073, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 0.5182557106018066, |
|
"learning_rate": 9.79773392229776e-07, |
|
"loss": 0.3092, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.18302227573750754, |
|
"grad_norm": 0.5343918204307556, |
|
"learning_rate": 9.79177428954492e-07, |
|
"loss": 0.3058, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.18422636965683323, |
|
"grad_norm": 0.42448320984840393, |
|
"learning_rate": 9.785729994646228e-07, |
|
"loss": 0.2966, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.18543046357615894, |
|
"grad_norm": 0.514305055141449, |
|
"learning_rate": 9.779601144393655e-07, |
|
"loss": 0.3063, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.18663455749548466, |
|
"grad_norm": 0.559808075428009, |
|
"learning_rate": 9.773387847073102e-07, |
|
"loss": 0.3103, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.18783865141481035, |
|
"grad_norm": 0.5099034905433655, |
|
"learning_rate": 9.767090212462506e-07, |
|
"loss": 0.3045, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.18904274533413606, |
|
"grad_norm": 0.5309582352638245, |
|
"learning_rate": 9.76070835182989e-07, |
|
"loss": 0.3198, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.19024683925346178, |
|
"grad_norm": 0.5174340605735779, |
|
"learning_rate": 9.754242377931402e-07, |
|
"loss": 0.3019, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.19145093317278747, |
|
"grad_norm": 0.47818174958229065, |
|
"learning_rate": 9.747692405009327e-07, |
|
"loss": 0.2885, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.19265502709211318, |
|
"grad_norm": 0.4435511529445648, |
|
"learning_rate": 9.741058548790055e-07, |
|
"loss": 0.2716, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.1938591210114389, |
|
"grad_norm": 0.47226864099502563, |
|
"learning_rate": 9.734340926482052e-07, |
|
"loss": 0.2911, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.1950632149307646, |
|
"grad_norm": 0.4990203082561493, |
|
"learning_rate": 9.72753965677378e-07, |
|
"loss": 0.3119, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.1962673088500903, |
|
"grad_norm": 0.6255252957344055, |
|
"learning_rate": 9.7206548598316e-07, |
|
"loss": 0.2902, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.19747140276941602, |
|
"grad_norm": 0.5827116370201111, |
|
"learning_rate": 9.713686657297655e-07, |
|
"loss": 0.3079, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.1986754966887417, |
|
"grad_norm": 0.5475650429725647, |
|
"learning_rate": 9.706635172287715e-07, |
|
"loss": 0.3095, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.19987959060806743, |
|
"grad_norm": 0.674460768699646, |
|
"learning_rate": 9.699500529389001e-07, |
|
"loss": 0.2953, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.20108368452739314, |
|
"grad_norm": 0.5000407695770264, |
|
"learning_rate": 9.692282854657989e-07, |
|
"loss": 0.3055, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.20228777844671886, |
|
"grad_norm": 0.5063086748123169, |
|
"learning_rate": 9.684982275618178e-07, |
|
"loss": 0.2952, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.20349187236604455, |
|
"grad_norm": 0.6266674399375916, |
|
"learning_rate": 9.677598921257842e-07, |
|
"loss": 0.3028, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.20469596628537026, |
|
"grad_norm": 1.3428351879119873, |
|
"learning_rate": 9.67013292202775e-07, |
|
"loss": 0.3165, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.20590006020469598, |
|
"grad_norm": 0.6307231187820435, |
|
"learning_rate": 9.66258440983885e-07, |
|
"loss": 0.3112, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.20710415412402167, |
|
"grad_norm": 0.5176913738250732, |
|
"learning_rate": 9.654953518059953e-07, |
|
"loss": 0.3042, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.20830824804334738, |
|
"grad_norm": 0.4618211090564728, |
|
"learning_rate": 9.647240381515376e-07, |
|
"loss": 0.3107, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.2095123419626731, |
|
"grad_norm": 0.4354129135608673, |
|
"learning_rate": 9.639445136482546e-07, |
|
"loss": 0.2932, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.2107164358819988, |
|
"grad_norm": 0.6150096654891968, |
|
"learning_rate": 9.631567920689607e-07, |
|
"loss": 0.2898, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2119205298013245, |
|
"grad_norm": 0.4629852771759033, |
|
"learning_rate": 9.623608873312979e-07, |
|
"loss": 0.2969, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.21312462372065022, |
|
"grad_norm": 0.4912186563014984, |
|
"learning_rate": 9.615568134974902e-07, |
|
"loss": 0.3037, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.2143287176399759, |
|
"grad_norm": 0.5452593564987183, |
|
"learning_rate": 9.607445847740946e-07, |
|
"loss": 0.3011, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.21553281155930162, |
|
"grad_norm": 0.5524305701255798, |
|
"learning_rate": 9.599242155117514e-07, |
|
"loss": 0.3056, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.21673690547862734, |
|
"grad_norm": 0.4734737277030945, |
|
"learning_rate": 9.590957202049288e-07, |
|
"loss": 0.2937, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.21794099939795303, |
|
"grad_norm": 0.5050627589225769, |
|
"learning_rate": 9.582591134916683e-07, |
|
"loss": 0.2964, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.21914509331727874, |
|
"grad_norm": 0.5784972310066223, |
|
"learning_rate": 9.574144101533258e-07, |
|
"loss": 0.3126, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.22034918723660446, |
|
"grad_norm": 0.67679762840271, |
|
"learning_rate": 9.565616251143093e-07, |
|
"loss": 0.2997, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.22155328115593018, |
|
"grad_norm": 0.730844259262085, |
|
"learning_rate": 9.55700773441817e-07, |
|
"loss": 0.2992, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.22275737507525586, |
|
"grad_norm": 0.511701226234436, |
|
"learning_rate": 9.5483187034557e-07, |
|
"loss": 0.2843, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.22396146899458158, |
|
"grad_norm": 0.49653661251068115, |
|
"learning_rate": 9.539549311775434e-07, |
|
"loss": 0.3003, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.2251655629139073, |
|
"grad_norm": 0.479397714138031, |
|
"learning_rate": 9.530699714316955e-07, |
|
"loss": 0.3007, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.22636965683323299, |
|
"grad_norm": 0.5917854905128479, |
|
"learning_rate": 9.521770067436944e-07, |
|
"loss": 0.2818, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.2275737507525587, |
|
"grad_norm": 0.4750485420227051, |
|
"learning_rate": 9.512760528906409e-07, |
|
"loss": 0.3107, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.22877784467188442, |
|
"grad_norm": 0.5081465244293213, |
|
"learning_rate": 9.503671257907905e-07, |
|
"loss": 0.3003, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2299819385912101, |
|
"grad_norm": 0.7816819548606873, |
|
"learning_rate": 9.494502415032714e-07, |
|
"loss": 0.2898, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.23118603251053582, |
|
"grad_norm": 0.600690484046936, |
|
"learning_rate": 9.485254162278013e-07, |
|
"loss": 0.2975, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.23239012642986154, |
|
"grad_norm": 0.6016291379928589, |
|
"learning_rate": 9.475926663044016e-07, |
|
"loss": 0.2895, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.23359422034918723, |
|
"grad_norm": 0.5959491729736328, |
|
"learning_rate": 9.466520082131074e-07, |
|
"loss": 0.293, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.23479831426851294, |
|
"grad_norm": 0.5337576270103455, |
|
"learning_rate": 9.457034585736776e-07, |
|
"loss": 0.2954, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.23600240818783866, |
|
"grad_norm": 0.5701966881752014, |
|
"learning_rate": 9.447470341453003e-07, |
|
"loss": 0.3016, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.23720650210716435, |
|
"grad_norm": 0.48122677206993103, |
|
"learning_rate": 9.437827518262976e-07, |
|
"loss": 0.2834, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.23841059602649006, |
|
"grad_norm": 0.6107509732246399, |
|
"learning_rate": 9.428106286538263e-07, |
|
"loss": 0.2865, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.23961468994581578, |
|
"grad_norm": 0.4537561237812042, |
|
"learning_rate": 9.418306818035773e-07, |
|
"loss": 0.2981, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.2408187838651415, |
|
"grad_norm": 0.6205712556838989, |
|
"learning_rate": 9.408429285894721e-07, |
|
"loss": 0.3099, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24202287778446718, |
|
"grad_norm": 0.4940670132637024, |
|
"learning_rate": 9.398473864633564e-07, |
|
"loss": 0.2942, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.2432269717037929, |
|
"grad_norm": 0.45464888215065, |
|
"learning_rate": 9.388440730146923e-07, |
|
"loss": 0.2875, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.24443106562311862, |
|
"grad_norm": 0.4339371919631958, |
|
"learning_rate": 9.378330059702479e-07, |
|
"loss": 0.284, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.2456351595424443, |
|
"grad_norm": 0.6798887848854065, |
|
"learning_rate": 9.368142031937826e-07, |
|
"loss": 0.3079, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.24683925346177002, |
|
"grad_norm": 0.504805326461792, |
|
"learning_rate": 9.357876826857334e-07, |
|
"loss": 0.2942, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.24804334738109574, |
|
"grad_norm": 1.0256134271621704, |
|
"learning_rate": 9.347534625828955e-07, |
|
"loss": 0.2958, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.24924744130042142, |
|
"grad_norm": 0.7034043073654175, |
|
"learning_rate": 9.337115611581019e-07, |
|
"loss": 0.2977, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.25045153521974717, |
|
"grad_norm": 0.6767880916595459, |
|
"learning_rate": 9.326619968199016e-07, |
|
"loss": 0.2843, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.25165562913907286, |
|
"grad_norm": 0.5257042050361633, |
|
"learning_rate": 9.316047881122334e-07, |
|
"loss": 0.2869, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.25285972305839854, |
|
"grad_norm": 0.5919986963272095, |
|
"learning_rate": 9.305399537140983e-07, |
|
"loss": 0.3009, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.2540638169777243, |
|
"grad_norm": 0.5936114192008972, |
|
"learning_rate": 9.294675124392302e-07, |
|
"loss": 0.2863, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.25526791089705, |
|
"grad_norm": 1.1754176616668701, |
|
"learning_rate": 9.283874832357625e-07, |
|
"loss": 0.2808, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.25647200481637566, |
|
"grad_norm": 0.6144666075706482, |
|
"learning_rate": 9.272998851858943e-07, |
|
"loss": 0.2854, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.2576760987357014, |
|
"grad_norm": 0.47984328866004944, |
|
"learning_rate": 9.262047375055524e-07, |
|
"loss": 0.2978, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.2588801926550271, |
|
"grad_norm": 0.6158226728439331, |
|
"learning_rate": 9.251020595440524e-07, |
|
"loss": 0.3072, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.2600842865743528, |
|
"grad_norm": 0.6357386708259583, |
|
"learning_rate": 9.239918707837564e-07, |
|
"loss": 0.2927, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.26128838049367853, |
|
"grad_norm": 0.6893799901008606, |
|
"learning_rate": 9.228741908397293e-07, |
|
"loss": 0.2988, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.2624924744130042, |
|
"grad_norm": 0.5763195157051086, |
|
"learning_rate": 9.217490394593914e-07, |
|
"loss": 0.3049, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.2636965683323299, |
|
"grad_norm": 0.5649781823158264, |
|
"learning_rate": 9.206164365221706e-07, |
|
"loss": 0.3083, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.26490066225165565, |
|
"grad_norm": 0.4519605040550232, |
|
"learning_rate": 9.194764020391506e-07, |
|
"loss": 0.274, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.26610475617098134, |
|
"grad_norm": 0.5203403830528259, |
|
"learning_rate": 9.183289561527164e-07, |
|
"loss": 0.2823, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.267308850090307, |
|
"grad_norm": 0.525934100151062, |
|
"learning_rate": 9.171741191362005e-07, |
|
"loss": 0.2928, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.26851294400963277, |
|
"grad_norm": 0.5151864290237427, |
|
"learning_rate": 9.160119113935227e-07, |
|
"loss": 0.2914, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.26971703792895846, |
|
"grad_norm": 0.663339376449585, |
|
"learning_rate": 9.14842353458831e-07, |
|
"loss": 0.301, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.27092113184828415, |
|
"grad_norm": 0.5526972413063049, |
|
"learning_rate": 9.136654659961381e-07, |
|
"loss": 0.2931, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.2721252257676099, |
|
"grad_norm": 0.6518740057945251, |
|
"learning_rate": 9.12481269798956e-07, |
|
"loss": 0.2772, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.2733293196869356, |
|
"grad_norm": 0.5191295742988586, |
|
"learning_rate": 9.112897857899298e-07, |
|
"loss": 0.2933, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.27453341360626127, |
|
"grad_norm": 1.087936282157898, |
|
"learning_rate": 9.100910350204669e-07, |
|
"loss": 0.2956, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.275737507525587, |
|
"grad_norm": 0.5870952010154724, |
|
"learning_rate": 9.088850386703653e-07, |
|
"loss": 0.2857, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.2769416014449127, |
|
"grad_norm": 0.5123207569122314, |
|
"learning_rate": 9.076718180474399e-07, |
|
"loss": 0.3005, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.2781456953642384, |
|
"grad_norm": 0.47658002376556396, |
|
"learning_rate": 9.064513945871457e-07, |
|
"loss": 0.2889, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.27934978928356413, |
|
"grad_norm": 0.564738929271698, |
|
"learning_rate": 9.052237898521984e-07, |
|
"loss": 0.2929, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.2805538832028898, |
|
"grad_norm": 0.47116583585739136, |
|
"learning_rate": 9.03989025532195e-07, |
|
"loss": 0.2942, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.2817579771222155, |
|
"grad_norm": 0.5838178396224976, |
|
"learning_rate": 9.027471234432292e-07, |
|
"loss": 0.2883, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.28296207104154125, |
|
"grad_norm": 0.48679229617118835, |
|
"learning_rate": 9.014981055275059e-07, |
|
"loss": 0.29, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.28416616496086694, |
|
"grad_norm": 0.5863898992538452, |
|
"learning_rate": 9.00241993852955e-07, |
|
"loss": 0.2871, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.28537025888019263, |
|
"grad_norm": 0.5949921607971191, |
|
"learning_rate": 8.989788106128402e-07, |
|
"loss": 0.2927, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.2865743527995184, |
|
"grad_norm": 0.42538484930992126, |
|
"learning_rate": 8.977085781253668e-07, |
|
"loss": 0.2825, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.28777844671884406, |
|
"grad_norm": 0.5678000450134277, |
|
"learning_rate": 8.964313188332881e-07, |
|
"loss": 0.294, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.2889825406381698, |
|
"grad_norm": 0.5283777713775635, |
|
"learning_rate": 8.951470553035086e-07, |
|
"loss": 0.286, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2901866345574955, |
|
"grad_norm": 0.8639681935310364, |
|
"learning_rate": 8.938558102266851e-07, |
|
"loss": 0.2971, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.2913907284768212, |
|
"grad_norm": 0.5353107452392578, |
|
"learning_rate": 8.925576064168261e-07, |
|
"loss": 0.3038, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.2925948223961469, |
|
"grad_norm": 0.5691916346549988, |
|
"learning_rate": 8.912524668108885e-07, |
|
"loss": 0.2901, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.2937989163154726, |
|
"grad_norm": 0.5999578833580017, |
|
"learning_rate": 8.899404144683724e-07, |
|
"loss": 0.2864, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.2950030102347983, |
|
"grad_norm": 0.6660271883010864, |
|
"learning_rate": 8.886214725709136e-07, |
|
"loss": 0.2866, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.29620710415412405, |
|
"grad_norm": 0.5501262545585632, |
|
"learning_rate": 8.872956644218742e-07, |
|
"loss": 0.2909, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.29741119807344973, |
|
"grad_norm": 0.44489532709121704, |
|
"learning_rate": 8.859630134459308e-07, |
|
"loss": 0.2869, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.2986152919927754, |
|
"grad_norm": 0.619097113609314, |
|
"learning_rate": 8.846235431886604e-07, |
|
"loss": 0.2782, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.29981938591210117, |
|
"grad_norm": 0.49712878465652466, |
|
"learning_rate": 8.832772773161251e-07, |
|
"loss": 0.2848, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.30102347983142685, |
|
"grad_norm": 0.46963346004486084, |
|
"learning_rate": 8.819242396144529e-07, |
|
"loss": 0.2915, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.30222757375075254, |
|
"grad_norm": 0.5881354212760925, |
|
"learning_rate": 8.805644539894181e-07, |
|
"loss": 0.2969, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.3034316676700783, |
|
"grad_norm": 0.5345028042793274, |
|
"learning_rate": 8.791979444660193e-07, |
|
"loss": 0.2985, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.304635761589404, |
|
"grad_norm": 0.5038124322891235, |
|
"learning_rate": 8.778247351880536e-07, |
|
"loss": 0.2931, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.30583985550872966, |
|
"grad_norm": 0.6723479628562927, |
|
"learning_rate": 8.764448504176919e-07, |
|
"loss": 0.2885, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.3070439494280554, |
|
"grad_norm": 0.474516361951828, |
|
"learning_rate": 8.750583145350483e-07, |
|
"loss": 0.2906, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.3082480433473811, |
|
"grad_norm": 0.509379506111145, |
|
"learning_rate": 8.736651520377507e-07, |
|
"loss": 0.2874, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.3094521372667068, |
|
"grad_norm": 0.9317507743835449, |
|
"learning_rate": 8.722653875405075e-07, |
|
"loss": 0.2891, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.3106562311860325, |
|
"grad_norm": 0.4634588360786438, |
|
"learning_rate": 8.708590457746727e-07, |
|
"loss": 0.284, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.3118603251053582, |
|
"grad_norm": 0.4674171209335327, |
|
"learning_rate": 8.694461515878088e-07, |
|
"loss": 0.2851, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.3130644190246839, |
|
"grad_norm": 0.4606451988220215, |
|
"learning_rate": 8.68026729943248e-07, |
|
"loss": 0.282, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.31426851294400965, |
|
"grad_norm": 0.5793256163597107, |
|
"learning_rate": 8.666008059196513e-07, |
|
"loss": 0.2852, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.31547260686333534, |
|
"grad_norm": 0.742026686668396, |
|
"learning_rate": 8.65168404710565e-07, |
|
"loss": 0.2909, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.316676700782661, |
|
"grad_norm": 0.469868928194046, |
|
"learning_rate": 8.637295516239757e-07, |
|
"loss": 0.2784, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.31788079470198677, |
|
"grad_norm": 0.6895257234573364, |
|
"learning_rate": 8.622842720818635e-07, |
|
"loss": 0.2849, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.31908488862131246, |
|
"grad_norm": 0.6843047142028809, |
|
"learning_rate": 8.608325916197524e-07, |
|
"loss": 0.2969, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.32028898254063815, |
|
"grad_norm": 2.822052240371704, |
|
"learning_rate": 8.593745358862592e-07, |
|
"loss": 0.2954, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.3214930764599639, |
|
"grad_norm": 0.5745678544044495, |
|
"learning_rate": 8.579101306426406e-07, |
|
"loss": 0.3005, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.3226971703792896, |
|
"grad_norm": 0.4625186026096344, |
|
"learning_rate": 8.564394017623378e-07, |
|
"loss": 0.2889, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.32390126429861527, |
|
"grad_norm": 0.5813141465187073, |
|
"learning_rate": 8.549623752305192e-07, |
|
"loss": 0.2926, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.325105358217941, |
|
"grad_norm": 0.49706658720970154, |
|
"learning_rate": 8.534790771436222e-07, |
|
"loss": 0.2884, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.3263094521372667, |
|
"grad_norm": 0.5477120280265808, |
|
"learning_rate": 8.519895337088907e-07, |
|
"loss": 0.2922, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.32751354605659244, |
|
"grad_norm": 1.157457709312439, |
|
"learning_rate": 8.504937712439131e-07, |
|
"loss": 0.2699, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.32871763997591813, |
|
"grad_norm": 0.5263344049453735, |
|
"learning_rate": 8.48991816176157e-07, |
|
"loss": 0.2888, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.3299217338952438, |
|
"grad_norm": 0.764481782913208, |
|
"learning_rate": 8.474836950425026e-07, |
|
"loss": 0.292, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.33112582781456956, |
|
"grad_norm": 0.5704035758972168, |
|
"learning_rate": 8.459694344887731e-07, |
|
"loss": 0.2928, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.33232992173389525, |
|
"grad_norm": 0.46473219990730286, |
|
"learning_rate": 8.444490612692645e-07, |
|
"loss": 0.2816, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.33353401565322094, |
|
"grad_norm": 0.5250662565231323, |
|
"learning_rate": 8.429226022462728e-07, |
|
"loss": 0.2881, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.3347381095725467, |
|
"grad_norm": 0.6085227727890015, |
|
"learning_rate": 8.413900843896193e-07, |
|
"loss": 0.3122, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.33594220349187237, |
|
"grad_norm": 0.7203246355056763, |
|
"learning_rate": 8.398515347761745e-07, |
|
"loss": 0.2911, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.33714629741119806, |
|
"grad_norm": 0.5305497050285339, |
|
"learning_rate": 8.383069805893784e-07, |
|
"loss": 0.2888, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.3383503913305238, |
|
"grad_norm": 0.5452449917793274, |
|
"learning_rate": 8.367564491187622e-07, |
|
"loss": 0.2866, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.3395544852498495, |
|
"grad_norm": 0.4815659523010254, |
|
"learning_rate": 8.351999677594645e-07, |
|
"loss": 0.2863, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.3407585791691752, |
|
"grad_norm": 0.5499128103256226, |
|
"learning_rate": 8.336375640117481e-07, |
|
"loss": 0.2865, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.3419626730885009, |
|
"grad_norm": 0.559804379940033, |
|
"learning_rate": 8.320692654805136e-07, |
|
"loss": 0.2833, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.3431667670078266, |
|
"grad_norm": 0.5070551633834839, |
|
"learning_rate": 8.304950998748124e-07, |
|
"loss": 0.2969, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.3443708609271523, |
|
"grad_norm": 0.5566725730895996, |
|
"learning_rate": 8.289150950073564e-07, |
|
"loss": 0.2814, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.34557495484647804, |
|
"grad_norm": 0.5421969890594482, |
|
"learning_rate": 8.273292787940268e-07, |
|
"loss": 0.2805, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.34677904876580373, |
|
"grad_norm": 0.49686506390571594, |
|
"learning_rate": 8.257376792533813e-07, |
|
"loss": 0.2872, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.3479831426851294, |
|
"grad_norm": 0.4665164649486542, |
|
"learning_rate": 8.241403245061584e-07, |
|
"loss": 0.2816, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.34918723660445516, |
|
"grad_norm": 0.4437556266784668, |
|
"learning_rate": 8.225372427747813e-07, |
|
"loss": 0.286, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.35039133052378085, |
|
"grad_norm": 0.5280335545539856, |
|
"learning_rate": 8.209284623828583e-07, |
|
"loss": 0.2895, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.35159542444310654, |
|
"grad_norm": 0.5298367142677307, |
|
"learning_rate": 8.193140117546832e-07, |
|
"loss": 0.282, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.3527995183624323, |
|
"grad_norm": 0.7123149633407593, |
|
"learning_rate": 8.176939194147329e-07, |
|
"loss": 0.2841, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.354003612281758, |
|
"grad_norm": 0.6565315127372742, |
|
"learning_rate": 8.160682139871632e-07, |
|
"loss": 0.2793, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.35520770620108366, |
|
"grad_norm": 0.7005172967910767, |
|
"learning_rate": 8.144369241953032e-07, |
|
"loss": 0.2854, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.3564118001204094, |
|
"grad_norm": 0.7468757033348083, |
|
"learning_rate": 8.128000788611478e-07, |
|
"loss": 0.2992, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.3576158940397351, |
|
"grad_norm": 0.5055456161499023, |
|
"learning_rate": 8.111577069048487e-07, |
|
"loss": 0.2979, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.3588199879590608, |
|
"grad_norm": 0.576806366443634, |
|
"learning_rate": 8.095098373442027e-07, |
|
"loss": 0.2915, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.3600240818783865, |
|
"grad_norm": 0.5598990321159363, |
|
"learning_rate": 8.078564992941401e-07, |
|
"loss": 0.2741, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.3612281757977122, |
|
"grad_norm": 0.5614596009254456, |
|
"learning_rate": 8.061977219662092e-07, |
|
"loss": 0.2913, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3624322697170379, |
|
"grad_norm": 0.37974095344543457, |
|
"learning_rate": 8.045335346680611e-07, |
|
"loss": 0.2787, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 0.6439441442489624, |
|
"learning_rate": 8.028639668029309e-07, |
|
"loss": 0.2868, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.36484045755568933, |
|
"grad_norm": 0.46323299407958984, |
|
"learning_rate": 8.011890478691196e-07, |
|
"loss": 0.2831, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.3660445514750151, |
|
"grad_norm": 0.4963575005531311, |
|
"learning_rate": 7.995088074594713e-07, |
|
"loss": 0.2782, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.36724864539434077, |
|
"grad_norm": 0.6179429888725281, |
|
"learning_rate": 7.978232752608516e-07, |
|
"loss": 0.2703, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.36845273931366646, |
|
"grad_norm": 0.5127160549163818, |
|
"learning_rate": 7.961324810536223e-07, |
|
"loss": 0.3007, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.3696568332329922, |
|
"grad_norm": 0.45177775621414185, |
|
"learning_rate": 7.94436454711116e-07, |
|
"loss": 0.288, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.3708609271523179, |
|
"grad_norm": 0.47144508361816406, |
|
"learning_rate": 7.927352261991074e-07, |
|
"loss": 0.2901, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.3720650210716436, |
|
"grad_norm": 0.5511527061462402, |
|
"learning_rate": 7.910288255752844e-07, |
|
"loss": 0.2754, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.3732691149909693, |
|
"grad_norm": 0.5164305567741394, |
|
"learning_rate": 7.893172829887171e-07, |
|
"loss": 0.2847, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.374473208910295, |
|
"grad_norm": 0.5629504919052124, |
|
"learning_rate": 7.876006286793251e-07, |
|
"loss": 0.2953, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.3756773028296207, |
|
"grad_norm": 0.513200044631958, |
|
"learning_rate": 7.858788929773422e-07, |
|
"loss": 0.2702, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.37688139674894644, |
|
"grad_norm": 0.504371166229248, |
|
"learning_rate": 7.841521063027825e-07, |
|
"loss": 0.2873, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.37808549066827213, |
|
"grad_norm": 0.613593578338623, |
|
"learning_rate": 7.824202991649013e-07, |
|
"loss": 0.27, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.3792895845875978, |
|
"grad_norm": 0.7345304489135742, |
|
"learning_rate": 7.806835021616564e-07, |
|
"loss": 0.2895, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.38049367850692356, |
|
"grad_norm": 0.48514464497566223, |
|
"learning_rate": 7.789417459791681e-07, |
|
"loss": 0.2809, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.38169777242624925, |
|
"grad_norm": 0.4638960063457489, |
|
"learning_rate": 7.77195061391176e-07, |
|
"loss": 0.2839, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.38290186634557494, |
|
"grad_norm": 0.5008341073989868, |
|
"learning_rate": 7.754434792584968e-07, |
|
"loss": 0.2701, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.3841059602649007, |
|
"grad_norm": 0.5258957743644714, |
|
"learning_rate": 7.73687030528477e-07, |
|
"loss": 0.2709, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.38531005418422637, |
|
"grad_norm": 0.5781968832015991, |
|
"learning_rate": 7.719257462344481e-07, |
|
"loss": 0.2994, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.38651414810355206, |
|
"grad_norm": 0.5485130548477173, |
|
"learning_rate": 7.701596574951771e-07, |
|
"loss": 0.3001, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.3877182420228778, |
|
"grad_norm": 0.4708418846130371, |
|
"learning_rate": 7.683887955143169e-07, |
|
"loss": 0.2736, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.3889223359422035, |
|
"grad_norm": 0.5321612358093262, |
|
"learning_rate": 7.666131915798556e-07, |
|
"loss": 0.2892, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.3901264298615292, |
|
"grad_norm": 0.524898111820221, |
|
"learning_rate": 7.648328770635623e-07, |
|
"loss": 0.2897, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.3913305237808549, |
|
"grad_norm": 0.4973953664302826, |
|
"learning_rate": 7.630478834204351e-07, |
|
"loss": 0.2804, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.3925346177001806, |
|
"grad_norm": 0.5439997315406799, |
|
"learning_rate": 7.612582421881423e-07, |
|
"loss": 0.2824, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.3937387116195063, |
|
"grad_norm": 0.5040695667266846, |
|
"learning_rate": 7.594639849864681e-07, |
|
"loss": 0.2806, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.39494280553883204, |
|
"grad_norm": 0.57867830991745, |
|
"learning_rate": 7.576651435167523e-07, |
|
"loss": 0.2788, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.39614689945815773, |
|
"grad_norm": 0.43785402178764343, |
|
"learning_rate": 7.558617495613304e-07, |
|
"loss": 0.272, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.3973509933774834, |
|
"grad_norm": 0.6042655110359192, |
|
"learning_rate": 7.540538349829725e-07, |
|
"loss": 0.2918, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.39855508729680916, |
|
"grad_norm": 0.6529451012611389, |
|
"learning_rate": 7.522414317243198e-07, |
|
"loss": 0.2882, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.39975918121613485, |
|
"grad_norm": 0.5043284296989441, |
|
"learning_rate": 7.50424571807321e-07, |
|
"loss": 0.2859, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.40096327513546054, |
|
"grad_norm": 0.44874584674835205, |
|
"learning_rate": 7.486032873326656e-07, |
|
"loss": 0.2912, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.4021673690547863, |
|
"grad_norm": 0.515211284160614, |
|
"learning_rate": 7.467776104792171e-07, |
|
"loss": 0.2747, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.40337146297411197, |
|
"grad_norm": 0.5425666570663452, |
|
"learning_rate": 7.449475735034453e-07, |
|
"loss": 0.2964, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.4045755568934377, |
|
"grad_norm": 0.5557084083557129, |
|
"learning_rate": 7.431132087388546e-07, |
|
"loss": 0.2809, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.4057796508127634, |
|
"grad_norm": 0.4438600540161133, |
|
"learning_rate": 7.412745485954144e-07, |
|
"loss": 0.269, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.4069837447320891, |
|
"grad_norm": 0.586608350276947, |
|
"learning_rate": 7.394316255589854e-07, |
|
"loss": 0.2848, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.40818783865141484, |
|
"grad_norm": 0.6429834961891174, |
|
"learning_rate": 7.375844721907466e-07, |
|
"loss": 0.2917, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.4093919325707405, |
|
"grad_norm": 0.5150188207626343, |
|
"learning_rate": 7.35733121126619e-07, |
|
"loss": 0.2772, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.4105960264900662, |
|
"grad_norm": 0.5537393093109131, |
|
"learning_rate": 7.338776050766896e-07, |
|
"loss": 0.2819, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.41180012040939196, |
|
"grad_norm": 0.4834784269332886, |
|
"learning_rate": 7.320179568246333e-07, |
|
"loss": 0.2851, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.41300421432871764, |
|
"grad_norm": 0.6806831955909729, |
|
"learning_rate": 7.301542092271337e-07, |
|
"loss": 0.2841, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.41420830824804333, |
|
"grad_norm": 0.5081019997596741, |
|
"learning_rate": 7.282863952133022e-07, |
|
"loss": 0.2763, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.4154124021673691, |
|
"grad_norm": 0.5681424140930176, |
|
"learning_rate": 7.264145477840974e-07, |
|
"loss": 0.2719, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.41661649608669477, |
|
"grad_norm": 0.6257504820823669, |
|
"learning_rate": 7.245387000117404e-07, |
|
"loss": 0.2813, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.41782059000602045, |
|
"grad_norm": 0.5195356607437134, |
|
"learning_rate": 7.226588850391317e-07, |
|
"loss": 0.2761, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.4190246839253462, |
|
"grad_norm": 0.5490323305130005, |
|
"learning_rate": 7.207751360792647e-07, |
|
"loss": 0.291, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.4202287778446719, |
|
"grad_norm": 0.6458017230033875, |
|
"learning_rate": 7.188874864146397e-07, |
|
"loss": 0.2919, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.4214328717639976, |
|
"grad_norm": 0.5081551671028137, |
|
"learning_rate": 7.16995969396676e-07, |
|
"loss": 0.2762, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4226369656833233, |
|
"grad_norm": 0.6496263742446899, |
|
"learning_rate": 7.151006184451212e-07, |
|
"loss": 0.2766, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.423841059602649, |
|
"grad_norm": 0.6383594870567322, |
|
"learning_rate": 7.132014670474625e-07, |
|
"loss": 0.2829, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.4250451535219747, |
|
"grad_norm": 0.6374247074127197, |
|
"learning_rate": 7.112985487583333e-07, |
|
"loss": 0.2776, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.42624924744130044, |
|
"grad_norm": 0.48250874876976013, |
|
"learning_rate": 7.093918971989229e-07, |
|
"loss": 0.2794, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.4274533413606261, |
|
"grad_norm": 0.5055521726608276, |
|
"learning_rate": 7.07481546056379e-07, |
|
"loss": 0.2818, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.4286574352799518, |
|
"grad_norm": 0.558320164680481, |
|
"learning_rate": 7.055675290832157e-07, |
|
"loss": 0.29, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.42986152919927756, |
|
"grad_norm": 0.54196697473526, |
|
"learning_rate": 7.036498800967153e-07, |
|
"loss": 0.2819, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.43106562311860325, |
|
"grad_norm": 0.5442371368408203, |
|
"learning_rate": 7.017286329783314e-07, |
|
"loss": 0.3044, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.43226971703792894, |
|
"grad_norm": 0.531579315662384, |
|
"learning_rate": 6.9980382167309e-07, |
|
"loss": 0.2875, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.4334738109572547, |
|
"grad_norm": 0.6069034934043884, |
|
"learning_rate": 6.978754801889902e-07, |
|
"loss": 0.2915, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.43467790487658037, |
|
"grad_norm": 0.5376235246658325, |
|
"learning_rate": 6.959436425964033e-07, |
|
"loss": 0.2768, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.43588199879590606, |
|
"grad_norm": 0.5438763499259949, |
|
"learning_rate": 6.9400834302747e-07, |
|
"loss": 0.2911, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.4370860927152318, |
|
"grad_norm": 0.4325105547904968, |
|
"learning_rate": 6.920696156754985e-07, |
|
"loss": 0.269, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.4382901866345575, |
|
"grad_norm": 0.5107905864715576, |
|
"learning_rate": 6.901274947943597e-07, |
|
"loss": 0.2754, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.4394942805538832, |
|
"grad_norm": 0.5302306413650513, |
|
"learning_rate": 6.881820146978822e-07, |
|
"loss": 0.2835, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.4406983744732089, |
|
"grad_norm": 0.5489309430122375, |
|
"learning_rate": 6.862332097592457e-07, |
|
"loss": 0.2746, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.4419024683925346, |
|
"grad_norm": 0.4515032172203064, |
|
"learning_rate": 6.842811144103743e-07, |
|
"loss": 0.2829, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.44310656231186035, |
|
"grad_norm": 0.5359588861465454, |
|
"learning_rate": 6.823257631413275e-07, |
|
"loss": 0.2826, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.44431065623118604, |
|
"grad_norm": 0.49561506509780884, |
|
"learning_rate": 6.803671904996916e-07, |
|
"loss": 0.2946, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.44551475015051173, |
|
"grad_norm": 0.43841075897216797, |
|
"learning_rate": 6.784054310899683e-07, |
|
"loss": 0.2802, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.4467188440698375, |
|
"grad_norm": 0.7528261542320251, |
|
"learning_rate": 6.764405195729639e-07, |
|
"loss": 0.2829, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.44792293798916316, |
|
"grad_norm": 1.1440777778625488, |
|
"learning_rate": 6.744724906651774e-07, |
|
"loss": 0.2665, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.44912703190848885, |
|
"grad_norm": 0.5153807997703552, |
|
"learning_rate": 6.72501379138186e-07, |
|
"loss": 0.2754, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.4503311258278146, |
|
"grad_norm": 0.582036554813385, |
|
"learning_rate": 6.705272198180312e-07, |
|
"loss": 0.2818, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.4515352197471403, |
|
"grad_norm": 0.7196856737136841, |
|
"learning_rate": 6.685500475846044e-07, |
|
"loss": 0.2744, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.45273931366646597, |
|
"grad_norm": 1.0595272779464722, |
|
"learning_rate": 6.665698973710288e-07, |
|
"loss": 0.2602, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.4539434075857917, |
|
"grad_norm": 0.4910378158092499, |
|
"learning_rate": 6.645868041630439e-07, |
|
"loss": 0.2887, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.4551475015051174, |
|
"grad_norm": 0.4395122230052948, |
|
"learning_rate": 6.626008029983867e-07, |
|
"loss": 0.2771, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.4563515954244431, |
|
"grad_norm": 0.5630185008049011, |
|
"learning_rate": 6.606119289661721e-07, |
|
"loss": 0.2976, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.45755568934376883, |
|
"grad_norm": 0.6062456965446472, |
|
"learning_rate": 6.58620217206274e-07, |
|
"loss": 0.2707, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.4587597832630945, |
|
"grad_norm": 0.6882142424583435, |
|
"learning_rate": 6.566257029087039e-07, |
|
"loss": 0.2732, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.4599638771824202, |
|
"grad_norm": 0.4631926417350769, |
|
"learning_rate": 6.546284213129885e-07, |
|
"loss": 0.2794, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.46116797110174595, |
|
"grad_norm": 0.4465793967247009, |
|
"learning_rate": 6.526284077075488e-07, |
|
"loss": 0.2809, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.46237206502107164, |
|
"grad_norm": 0.5073222517967224, |
|
"learning_rate": 6.506256974290747e-07, |
|
"loss": 0.2908, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.46357615894039733, |
|
"grad_norm": 0.5717306137084961, |
|
"learning_rate": 6.486203258619016e-07, |
|
"loss": 0.282, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.4647802528597231, |
|
"grad_norm": 0.5614638924598694, |
|
"learning_rate": 6.466123284373858e-07, |
|
"loss": 0.2764, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.46598434677904876, |
|
"grad_norm": 0.626006007194519, |
|
"learning_rate": 6.446017406332772e-07, |
|
"loss": 0.277, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.46718844069837445, |
|
"grad_norm": 0.47509709000587463, |
|
"learning_rate": 6.425885979730933e-07, |
|
"loss": 0.2828, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.4683925346177002, |
|
"grad_norm": 0.5545176267623901, |
|
"learning_rate": 6.405729360254914e-07, |
|
"loss": 0.2893, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.4695966285370259, |
|
"grad_norm": 0.4888879060745239, |
|
"learning_rate": 6.3855479040364e-07, |
|
"loss": 0.2811, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.4708007224563516, |
|
"grad_norm": 0.44063079357147217, |
|
"learning_rate": 6.365341967645902e-07, |
|
"loss": 0.2782, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.4720048163756773, |
|
"grad_norm": 0.5356207489967346, |
|
"learning_rate": 6.345111908086444e-07, |
|
"loss": 0.2658, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.473208910295003, |
|
"grad_norm": 0.5134460926055908, |
|
"learning_rate": 6.324858082787275e-07, |
|
"loss": 0.2782, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.4744130042143287, |
|
"grad_norm": 0.5685980916023254, |
|
"learning_rate": 6.304580849597527e-07, |
|
"loss": 0.2704, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.47561709813365444, |
|
"grad_norm": 0.8610411286354065, |
|
"learning_rate": 6.284280566779923e-07, |
|
"loss": 0.29, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.4768211920529801, |
|
"grad_norm": 0.5496920943260193, |
|
"learning_rate": 6.263957593004421e-07, |
|
"loss": 0.2704, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.4780252859723058, |
|
"grad_norm": 0.4593532383441925, |
|
"learning_rate": 6.243612287341896e-07, |
|
"loss": 0.2806, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.47922937989163156, |
|
"grad_norm": 0.5178139805793762, |
|
"learning_rate": 6.223245009257783e-07, |
|
"loss": 0.2683, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.48043347381095725, |
|
"grad_norm": 0.6350088119506836, |
|
"learning_rate": 6.20285611860573e-07, |
|
"loss": 0.2796, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.481637567730283, |
|
"grad_norm": 0.4848230183124542, |
|
"learning_rate": 6.182445975621246e-07, |
|
"loss": 0.2727, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4828416616496087, |
|
"grad_norm": 0.6039783358573914, |
|
"learning_rate": 6.162014940915323e-07, |
|
"loss": 0.295, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.48404575556893437, |
|
"grad_norm": 0.5623034834861755, |
|
"learning_rate": 6.141563375468082e-07, |
|
"loss": 0.2843, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.4852498494882601, |
|
"grad_norm": 0.5298231244087219, |
|
"learning_rate": 6.12109164062238e-07, |
|
"loss": 0.2685, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.4864539434075858, |
|
"grad_norm": 0.49439486861228943, |
|
"learning_rate": 6.100600098077431e-07, |
|
"loss": 0.2588, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.4876580373269115, |
|
"grad_norm": 0.4667768180370331, |
|
"learning_rate": 6.080089109882418e-07, |
|
"loss": 0.275, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.48886213124623723, |
|
"grad_norm": 0.5490863919258118, |
|
"learning_rate": 6.059559038430094e-07, |
|
"loss": 0.2837, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.4900662251655629, |
|
"grad_norm": 0.467192143201828, |
|
"learning_rate": 6.039010246450376e-07, |
|
"loss": 0.2733, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.4912703190848886, |
|
"grad_norm": 0.49663642048835754, |
|
"learning_rate": 6.018443097003945e-07, |
|
"loss": 0.2738, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.49247441300421435, |
|
"grad_norm": 0.501777708530426, |
|
"learning_rate": 5.997857953475823e-07, |
|
"loss": 0.2743, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.49367850692354004, |
|
"grad_norm": 0.5064652562141418, |
|
"learning_rate": 5.977255179568955e-07, |
|
"loss": 0.2748, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.4948826008428657, |
|
"grad_norm": 0.6248656511306763, |
|
"learning_rate": 5.956635139297783e-07, |
|
"loss": 0.2765, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.49608669476219147, |
|
"grad_norm": 0.45688706636428833, |
|
"learning_rate": 5.935998196981817e-07, |
|
"loss": 0.271, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.49729078868151716, |
|
"grad_norm": 0.7225250601768494, |
|
"learning_rate": 5.915344717239197e-07, |
|
"loss": 0.2853, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.49849488260084285, |
|
"grad_norm": 0.5863081812858582, |
|
"learning_rate": 5.894675064980246e-07, |
|
"loss": 0.2685, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.4996989765201686, |
|
"grad_norm": 0.5770187973976135, |
|
"learning_rate": 5.87398960540103e-07, |
|
"loss": 0.2774, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.5009030704394943, |
|
"grad_norm": 0.41943806409835815, |
|
"learning_rate": 5.8532887039769e-07, |
|
"loss": 0.2622, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.50210716435882, |
|
"grad_norm": 0.6374907493591309, |
|
"learning_rate": 5.832572726456039e-07, |
|
"loss": 0.2858, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.5033112582781457, |
|
"grad_norm": 0.5210843086242676, |
|
"learning_rate": 5.811842038852996e-07, |
|
"loss": 0.2706, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.5045153521974715, |
|
"grad_norm": 0.596387505531311, |
|
"learning_rate": 5.791097007442222e-07, |
|
"loss": 0.2823, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.5057194461167971, |
|
"grad_norm": 0.6676878929138184, |
|
"learning_rate": 5.7703379987516e-07, |
|
"loss": 0.2848, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5069235400361228, |
|
"grad_norm": 0.6097555160522461, |
|
"learning_rate": 5.749565379555961e-07, |
|
"loss": 0.2766, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.5081276339554486, |
|
"grad_norm": 0.6043739318847656, |
|
"learning_rate": 5.728779516870615e-07, |
|
"loss": 0.2885, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.5093317278747742, |
|
"grad_norm": 0.5565124750137329, |
|
"learning_rate": 5.707980777944859e-07, |
|
"loss": 0.2643, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.5105358217941, |
|
"grad_norm": 0.49649959802627563, |
|
"learning_rate": 5.687169530255487e-07, |
|
"loss": 0.2672, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.5117399157134257, |
|
"grad_norm": 0.49968451261520386, |
|
"learning_rate": 5.666346141500307e-07, |
|
"loss": 0.2754, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.5129440096327513, |
|
"grad_norm": 0.4982677698135376, |
|
"learning_rate": 5.645510979591634e-07, |
|
"loss": 0.2785, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.5141481035520771, |
|
"grad_norm": 0.904083251953125, |
|
"learning_rate": 5.624664412649797e-07, |
|
"loss": 0.2833, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.5153521974714028, |
|
"grad_norm": 0.5038682222366333, |
|
"learning_rate": 5.603806808996625e-07, |
|
"loss": 0.2746, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.5165562913907285, |
|
"grad_norm": 0.7115175724029541, |
|
"learning_rate": 5.58293853714895e-07, |
|
"loss": 0.2712, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.5177603853100542, |
|
"grad_norm": 0.5522176027297974, |
|
"learning_rate": 5.562059965812097e-07, |
|
"loss": 0.2869, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.5189644792293799, |
|
"grad_norm": 0.6081178784370422, |
|
"learning_rate": 5.541171463873357e-07, |
|
"loss": 0.2751, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.5201685731487056, |
|
"grad_norm": 0.5689599514007568, |
|
"learning_rate": 5.52027340039548e-07, |
|
"loss": 0.2875, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.5213726670680313, |
|
"grad_norm": 0.43370601534843445, |
|
"learning_rate": 5.499366144610153e-07, |
|
"loss": 0.2673, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.5225767609873571, |
|
"grad_norm": 0.5115625262260437, |
|
"learning_rate": 5.478450065911473e-07, |
|
"loss": 0.2791, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.5237808549066827, |
|
"grad_norm": 0.518798291683197, |
|
"learning_rate": 5.45752553384942e-07, |
|
"loss": 0.277, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.5249849488260084, |
|
"grad_norm": 0.5628324151039124, |
|
"learning_rate": 5.436592918123337e-07, |
|
"loss": 0.2884, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.5261890427453342, |
|
"grad_norm": 0.47458890080451965, |
|
"learning_rate": 5.415652588575385e-07, |
|
"loss": 0.27, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.5273931366646598, |
|
"grad_norm": 0.6163709759712219, |
|
"learning_rate": 5.394704915184014e-07, |
|
"loss": 0.2643, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.5285972305839856, |
|
"grad_norm": 0.44985631108283997, |
|
"learning_rate": 5.373750268057431e-07, |
|
"loss": 0.2774, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.5298013245033113, |
|
"grad_norm": 0.47572416067123413, |
|
"learning_rate": 5.352789017427052e-07, |
|
"loss": 0.278, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5310054184226369, |
|
"grad_norm": 0.5311432480812073, |
|
"learning_rate": 5.33182153364097e-07, |
|
"loss": 0.283, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.5322095123419627, |
|
"grad_norm": 0.5810163617134094, |
|
"learning_rate": 5.310848187157403e-07, |
|
"loss": 0.257, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.5334136062612884, |
|
"grad_norm": 0.8989514708518982, |
|
"learning_rate": 5.289869348538153e-07, |
|
"loss": 0.2846, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.534617700180614, |
|
"grad_norm": 0.4534051716327667, |
|
"learning_rate": 5.26888538844206e-07, |
|
"loss": 0.2836, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.5358217940999398, |
|
"grad_norm": 0.4670819938182831, |
|
"learning_rate": 5.247896677618452e-07, |
|
"loss": 0.2614, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.5370258880192655, |
|
"grad_norm": 0.5935913324356079, |
|
"learning_rate": 5.226903586900587e-07, |
|
"loss": 0.2826, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.5382299819385912, |
|
"grad_norm": 0.45839351415634155, |
|
"learning_rate": 5.205906487199119e-07, |
|
"loss": 0.2514, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.5394340758579169, |
|
"grad_norm": 0.4929831624031067, |
|
"learning_rate": 5.184905749495525e-07, |
|
"loss": 0.2815, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.5406381697772427, |
|
"grad_norm": 0.529437780380249, |
|
"learning_rate": 5.163901744835564e-07, |
|
"loss": 0.2744, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.5418422636965683, |
|
"grad_norm": 0.44370970129966736, |
|
"learning_rate": 5.14289484432271e-07, |
|
"loss": 0.2837, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.543046357615894, |
|
"grad_norm": 0.46680358052253723, |
|
"learning_rate": 5.121885419111611e-07, |
|
"loss": 0.2833, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.5442504515352198, |
|
"grad_norm": 0.5581067204475403, |
|
"learning_rate": 5.100873840401513e-07, |
|
"loss": 0.2846, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 0.4683559238910675, |
|
"learning_rate": 5.079860479429718e-07, |
|
"loss": 0.2666, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.5466586393738712, |
|
"grad_norm": 0.464067280292511, |
|
"learning_rate": 5.058845707465009e-07, |
|
"loss": 0.2693, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.5478627332931969, |
|
"grad_norm": 0.5715063214302063, |
|
"learning_rate": 5.037829895801106e-07, |
|
"loss": 0.2746, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.5490668272125225, |
|
"grad_norm": 0.585356593132019, |
|
"learning_rate": 5.016813415750097e-07, |
|
"loss": 0.281, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.5502709211318483, |
|
"grad_norm": 0.4893047511577606, |
|
"learning_rate": 4.995796638635875e-07, |
|
"loss": 0.2799, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.551475015051174, |
|
"grad_norm": 1.0689632892608643, |
|
"learning_rate": 4.974779935787589e-07, |
|
"loss": 0.2574, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.5526791089704997, |
|
"grad_norm": 0.6054455637931824, |
|
"learning_rate": 4.953763678533068e-07, |
|
"loss": 0.2635, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.5538832028898254, |
|
"grad_norm": 0.46325477957725525, |
|
"learning_rate": 4.932748238192273e-07, |
|
"loss": 0.2769, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.5550872968091511, |
|
"grad_norm": 0.5770764350891113, |
|
"learning_rate": 4.911733986070735e-07, |
|
"loss": 0.2671, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.5562913907284768, |
|
"grad_norm": 0.5715611577033997, |
|
"learning_rate": 4.890721293452979e-07, |
|
"loss": 0.2917, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.5574954846478025, |
|
"grad_norm": 0.5384266972541809, |
|
"learning_rate": 4.869710531595988e-07, |
|
"loss": 0.2771, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.5586995785671283, |
|
"grad_norm": 0.4611688256263733, |
|
"learning_rate": 4.848702071722629e-07, |
|
"loss": 0.2828, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.5599036724864539, |
|
"grad_norm": 0.6118834018707275, |
|
"learning_rate": 4.827696285015094e-07, |
|
"loss": 0.2832, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.5611077664057796, |
|
"grad_norm": 0.5026919841766357, |
|
"learning_rate": 4.806693542608348e-07, |
|
"loss": 0.2735, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.5623118603251054, |
|
"grad_norm": 0.548273503780365, |
|
"learning_rate": 4.785694215583566e-07, |
|
"loss": 0.2742, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.563515954244431, |
|
"grad_norm": 0.6186013221740723, |
|
"learning_rate": 4.764698674961581e-07, |
|
"loss": 0.2784, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.5647200481637568, |
|
"grad_norm": 0.45300328731536865, |
|
"learning_rate": 4.743707291696329e-07, |
|
"loss": 0.2786, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.5659241420830825, |
|
"grad_norm": 0.49064886569976807, |
|
"learning_rate": 4.7227204366682873e-07, |
|
"loss": 0.2747, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.5671282360024081, |
|
"grad_norm": 0.5186241865158081, |
|
"learning_rate": 4.7017384806779336e-07, |
|
"loss": 0.2788, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.5683323299217339, |
|
"grad_norm": 0.5284368395805359, |
|
"learning_rate": 4.6807617944391843e-07, |
|
"loss": 0.264, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.5695364238410596, |
|
"grad_norm": 0.5770208239555359, |
|
"learning_rate": 4.6597907485728477e-07, |
|
"loss": 0.2759, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.5707405177603853, |
|
"grad_norm": 0.5039085149765015, |
|
"learning_rate": 4.6388257136000807e-07, |
|
"loss": 0.2807, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.571944611679711, |
|
"grad_norm": 1.2547776699066162, |
|
"learning_rate": 4.617867059935838e-07, |
|
"loss": 0.2651, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.5731487055990367, |
|
"grad_norm": 0.5457895398139954, |
|
"learning_rate": 4.5969151578823224e-07, |
|
"loss": 0.27, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.5743527995183624, |
|
"grad_norm": 0.4974658787250519, |
|
"learning_rate": 4.5759703776224555e-07, |
|
"loss": 0.2794, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.5755568934376881, |
|
"grad_norm": 0.5161871314048767, |
|
"learning_rate": 4.555033089213321e-07, |
|
"loss": 0.2816, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.5767609873570139, |
|
"grad_norm": 0.43015995621681213, |
|
"learning_rate": 4.534103662579642e-07, |
|
"loss": 0.267, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.5779650812763396, |
|
"grad_norm": 0.4864785969257355, |
|
"learning_rate": 4.5131824675072364e-07, |
|
"loss": 0.2793, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.5791691751956652, |
|
"grad_norm": 0.6006112694740295, |
|
"learning_rate": 4.492269873636482e-07, |
|
"loss": 0.2689, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.580373269114991, |
|
"grad_norm": 0.4434204399585724, |
|
"learning_rate": 4.4713662504557927e-07, |
|
"loss": 0.2876, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.5815773630343167, |
|
"grad_norm": 0.565077543258667, |
|
"learning_rate": 4.450471967295083e-07, |
|
"loss": 0.2658, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.5827814569536424, |
|
"grad_norm": 0.5381281971931458, |
|
"learning_rate": 4.429587393319246e-07, |
|
"loss": 0.2715, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.5839855508729681, |
|
"grad_norm": 0.49021026492118835, |
|
"learning_rate": 4.408712897521633e-07, |
|
"loss": 0.2688, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.5851896447922939, |
|
"grad_norm": 0.5293102264404297, |
|
"learning_rate": 4.3878488487175323e-07, |
|
"loss": 0.2604, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.5863937387116195, |
|
"grad_norm": 0.6353856921195984, |
|
"learning_rate": 4.3669956155376476e-07, |
|
"loss": 0.2586, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.5875978326309452, |
|
"grad_norm": 0.5306446552276611, |
|
"learning_rate": 4.3461535664215923e-07, |
|
"loss": 0.2624, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.588801926550271, |
|
"grad_norm": 0.5957462191581726, |
|
"learning_rate": 4.325323069611383e-07, |
|
"loss": 0.2731, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.5900060204695966, |
|
"grad_norm": 0.6803829073905945, |
|
"learning_rate": 4.3045044931449156e-07, |
|
"loss": 0.2779, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.5912101143889223, |
|
"grad_norm": 0.5501326322555542, |
|
"learning_rate": 4.2836982048494854e-07, |
|
"loss": 0.2675, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.5924142083082481, |
|
"grad_norm": 0.49481987953186035, |
|
"learning_rate": 4.262904572335272e-07, |
|
"loss": 0.2725, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.5936183022275737, |
|
"grad_norm": 0.5254814028739929, |
|
"learning_rate": 4.242123962988851e-07, |
|
"loss": 0.2804, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.5948223961468995, |
|
"grad_norm": 0.5598310232162476, |
|
"learning_rate": 4.2213567439667037e-07, |
|
"loss": 0.2703, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.5960264900662252, |
|
"grad_norm": 0.5715354681015015, |
|
"learning_rate": 4.200603282188724e-07, |
|
"loss": 0.2799, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.5972305839855508, |
|
"grad_norm": 0.6474336981773376, |
|
"learning_rate": 4.179863944331743e-07, |
|
"loss": 0.2799, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.5984346779048766, |
|
"grad_norm": 0.47116249799728394, |
|
"learning_rate": 4.15913909682305e-07, |
|
"loss": 0.2751, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.5996387718242023, |
|
"grad_norm": 0.5750442147254944, |
|
"learning_rate": 4.138429105833906e-07, |
|
"loss": 0.2719, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.600842865743528, |
|
"grad_norm": 0.5243822932243347, |
|
"learning_rate": 4.1177343372730923e-07, |
|
"loss": 0.2709, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.6020469596628537, |
|
"grad_norm": 0.5334904789924622, |
|
"learning_rate": 4.097055156780437e-07, |
|
"loss": 0.272, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 8305, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1967389652549632.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|