|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.498581560283688, |
|
"eval_steps": 500, |
|
"global_step": 10573, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0070921985815602835, |
|
"grad_norm": 5.856144428253174, |
|
"learning_rate": 3.780718336483932e-06, |
|
"loss": 0.8655, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014184397163120567, |
|
"grad_norm": 5.8862433433532715, |
|
"learning_rate": 7.561436672967864e-06, |
|
"loss": 0.7361, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02127659574468085, |
|
"grad_norm": 3.5518908500671387, |
|
"learning_rate": 1.1342155009451797e-05, |
|
"loss": 0.551, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.028368794326241134, |
|
"grad_norm": 3.55409836769104, |
|
"learning_rate": 1.5122873345935728e-05, |
|
"loss": 0.3709, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03546099290780142, |
|
"grad_norm": 2.7126119136810303, |
|
"learning_rate": 1.890359168241966e-05, |
|
"loss": 0.3237, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0425531914893617, |
|
"grad_norm": 2.9090919494628906, |
|
"learning_rate": 2.2684310018903593e-05, |
|
"loss": 0.2899, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04964539007092199, |
|
"grad_norm": 1.7755730152130127, |
|
"learning_rate": 2.646502835538752e-05, |
|
"loss": 0.1869, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05673758865248227, |
|
"grad_norm": 1.6960084438323975, |
|
"learning_rate": 3.0245746691871456e-05, |
|
"loss": 0.1864, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06382978723404255, |
|
"grad_norm": 1.7639371156692505, |
|
"learning_rate": 3.4026465028355385e-05, |
|
"loss": 0.1408, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07092198581560284, |
|
"grad_norm": 2.5512263774871826, |
|
"learning_rate": 3.780718336483932e-05, |
|
"loss": 0.1363, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07801418439716312, |
|
"grad_norm": 1.9627578258514404, |
|
"learning_rate": 4.158790170132325e-05, |
|
"loss": 0.129, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 0.9527886509895325, |
|
"learning_rate": 4.5368620037807186e-05, |
|
"loss": 0.1181, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09219858156028368, |
|
"grad_norm": 3.0496978759765625, |
|
"learning_rate": 4.914933837429112e-05, |
|
"loss": 0.0903, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09929078014184398, |
|
"grad_norm": 1.1979912519454956, |
|
"learning_rate": 5.293005671077504e-05, |
|
"loss": 0.0906, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10638297872340426, |
|
"grad_norm": 1.2007324695587158, |
|
"learning_rate": 5.671077504725898e-05, |
|
"loss": 0.0997, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11347517730496454, |
|
"grad_norm": 2.112391948699951, |
|
"learning_rate": 6.049149338374291e-05, |
|
"loss": 0.0903, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12056737588652482, |
|
"grad_norm": 1.144476056098938, |
|
"learning_rate": 6.427221172022685e-05, |
|
"loss": 0.0853, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 0.9079101085662842, |
|
"learning_rate": 6.805293005671077e-05, |
|
"loss": 0.0813, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1347517730496454, |
|
"grad_norm": 2.0803258419036865, |
|
"learning_rate": 7.183364839319471e-05, |
|
"loss": 0.0929, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14184397163120568, |
|
"grad_norm": 1.2291367053985596, |
|
"learning_rate": 7.561436672967865e-05, |
|
"loss": 0.0875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14893617021276595, |
|
"grad_norm": 1.3166685104370117, |
|
"learning_rate": 7.939508506616258e-05, |
|
"loss": 0.0903, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15602836879432624, |
|
"grad_norm": 1.6771767139434814, |
|
"learning_rate": 8.31758034026465e-05, |
|
"loss": 0.0837, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16312056737588654, |
|
"grad_norm": 1.185477375984192, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 0.0774, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 0.8530003428459167, |
|
"learning_rate": 9.073724007561437e-05, |
|
"loss": 0.0767, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1773049645390071, |
|
"grad_norm": 0.8007742762565613, |
|
"learning_rate": 9.45179584120983e-05, |
|
"loss": 0.0733, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18439716312056736, |
|
"grad_norm": 0.5508751273155212, |
|
"learning_rate": 9.829867674858224e-05, |
|
"loss": 0.0853, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19148936170212766, |
|
"grad_norm": 0.8425294756889343, |
|
"learning_rate": 0.00010207939508506617, |
|
"loss": 0.0725, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19858156028368795, |
|
"grad_norm": 1.2945622205734253, |
|
"learning_rate": 0.00010586011342155009, |
|
"loss": 0.0879, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20567375886524822, |
|
"grad_norm": 0.6478763818740845, |
|
"learning_rate": 0.00010964083175803403, |
|
"loss": 0.0553, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 0.9865133166313171, |
|
"learning_rate": 0.00011342155009451796, |
|
"loss": 0.0793, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2198581560283688, |
|
"grad_norm": 1.046968936920166, |
|
"learning_rate": 0.00011720226843100191, |
|
"loss": 0.0825, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22695035460992907, |
|
"grad_norm": 0.9418226480484009, |
|
"learning_rate": 0.00012098298676748583, |
|
"loss": 0.0793, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23404255319148937, |
|
"grad_norm": 1.2901511192321777, |
|
"learning_rate": 0.00012476370510396974, |
|
"loss": 0.0753, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24113475177304963, |
|
"grad_norm": 1.3087291717529297, |
|
"learning_rate": 0.0001285444234404537, |
|
"loss": 0.0615, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.24822695035460993, |
|
"grad_norm": 0.9991538524627686, |
|
"learning_rate": 0.00013232514177693763, |
|
"loss": 0.0626, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 0.6831763386726379, |
|
"learning_rate": 0.00013610586011342154, |
|
"loss": 0.0626, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2624113475177305, |
|
"grad_norm": 0.7626124024391174, |
|
"learning_rate": 0.0001398865784499055, |
|
"loss": 0.0622, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2695035460992908, |
|
"grad_norm": 0.6531655192375183, |
|
"learning_rate": 0.00014366729678638943, |
|
"loss": 0.0607, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2765957446808511, |
|
"grad_norm": 0.8742074966430664, |
|
"learning_rate": 0.00014744801512287336, |
|
"loss": 0.0768, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.28368794326241137, |
|
"grad_norm": 0.8710255026817322, |
|
"learning_rate": 0.0001512287334593573, |
|
"loss": 0.0576, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2907801418439716, |
|
"grad_norm": 0.8089184761047363, |
|
"learning_rate": 0.0001550094517958412, |
|
"loss": 0.0659, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2978723404255319, |
|
"grad_norm": 1.0766539573669434, |
|
"learning_rate": 0.00015879017013232515, |
|
"loss": 0.0748, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3049645390070922, |
|
"grad_norm": 0.9432766437530518, |
|
"learning_rate": 0.0001625708884688091, |
|
"loss": 0.0659, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3120567375886525, |
|
"grad_norm": 0.7996474504470825, |
|
"learning_rate": 0.000166351606805293, |
|
"loss": 0.0659, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3191489361702128, |
|
"grad_norm": 1.3181546926498413, |
|
"learning_rate": 0.00017013232514177695, |
|
"loss": 0.0658, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3262411347517731, |
|
"grad_norm": 0.8984364867210388, |
|
"learning_rate": 0.00017391304347826088, |
|
"loss": 0.0596, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 1.0370538234710693, |
|
"learning_rate": 0.0001776937618147448, |
|
"loss": 0.066, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 1.0649698972702026, |
|
"learning_rate": 0.00018147448015122874, |
|
"loss": 0.0597, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3475177304964539, |
|
"grad_norm": 0.5405861735343933, |
|
"learning_rate": 0.00018525519848771268, |
|
"loss": 0.0603, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3546099290780142, |
|
"grad_norm": 0.8146863579750061, |
|
"learning_rate": 0.0001890359168241966, |
|
"loss": 0.0524, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3617021276595745, |
|
"grad_norm": 0.6537788510322571, |
|
"learning_rate": 0.00019281663516068054, |
|
"loss": 0.0564, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.36879432624113473, |
|
"grad_norm": 0.8714485764503479, |
|
"learning_rate": 0.00019659735349716447, |
|
"loss": 0.0525, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.375886524822695, |
|
"grad_norm": 0.5386486649513245, |
|
"learning_rate": 0.00019999999510833915, |
|
"loss": 0.0557, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3829787234042553, |
|
"grad_norm": 0.6375821828842163, |
|
"learning_rate": 0.00019999940810961714, |
|
"loss": 0.0614, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3900709219858156, |
|
"grad_norm": 0.6789309978485107, |
|
"learning_rate": 0.00019999784278530695, |
|
"loss": 0.0537, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3971631205673759, |
|
"grad_norm": 0.8208333253860474, |
|
"learning_rate": 0.00019999529915072262, |
|
"loss": 0.0668, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.40425531914893614, |
|
"grad_norm": 0.6849876642227173, |
|
"learning_rate": 0.00019999177723074935, |
|
"loss": 0.0612, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41134751773049644, |
|
"grad_norm": 0.690582811832428, |
|
"learning_rate": 0.00019998727705984316, |
|
"loss": 0.0652, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.41843971631205673, |
|
"grad_norm": 0.5250919461250305, |
|
"learning_rate": 0.00019998179868203068, |
|
"loss": 0.0596, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 0.6307488679885864, |
|
"learning_rate": 0.00019997534215090857, |
|
"loss": 0.057, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4326241134751773, |
|
"grad_norm": 0.630332350730896, |
|
"learning_rate": 0.00019996790752964305, |
|
"loss": 0.066, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4397163120567376, |
|
"grad_norm": 0.40226250886917114, |
|
"learning_rate": 0.00019995949489096945, |
|
"loss": 0.0555, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.44680851063829785, |
|
"grad_norm": 0.5462756752967834, |
|
"learning_rate": 0.00019995010431719118, |
|
"loss": 0.0507, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.45390070921985815, |
|
"grad_norm": 0.5090711116790771, |
|
"learning_rate": 0.00019993973590017922, |
|
"loss": 0.0458, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46099290780141844, |
|
"grad_norm": 0.47854822874069214, |
|
"learning_rate": 0.00019992838974137103, |
|
"loss": 0.0459, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.46808510638297873, |
|
"grad_norm": 0.7866285443305969, |
|
"learning_rate": 0.00019991606595176964, |
|
"loss": 0.0585, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.475177304964539, |
|
"grad_norm": 0.8126336932182312, |
|
"learning_rate": 0.0001999027646519425, |
|
"loss": 0.0448, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.48226950354609927, |
|
"grad_norm": 0.5472203493118286, |
|
"learning_rate": 0.0001998884859720205, |
|
"loss": 0.0514, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.48936170212765956, |
|
"grad_norm": 0.6094168424606323, |
|
"learning_rate": 0.00019987323005169638, |
|
"loss": 0.0459, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.49645390070921985, |
|
"grad_norm": 0.5688044428825378, |
|
"learning_rate": 0.00019985699704022357, |
|
"loss": 0.053, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5035460992907801, |
|
"grad_norm": 0.469110906124115, |
|
"learning_rate": 0.00019983978709641481, |
|
"loss": 0.0524, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 0.8332406282424927, |
|
"learning_rate": 0.00019982160038864032, |
|
"loss": 0.0507, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5177304964539007, |
|
"grad_norm": 0.8524491190910339, |
|
"learning_rate": 0.00019980243709482633, |
|
"loss": 0.0573, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.524822695035461, |
|
"grad_norm": 0.7200371026992798, |
|
"learning_rate": 0.00019978229740245343, |
|
"loss": 0.0502, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"grad_norm": 0.4582567811012268, |
|
"learning_rate": 0.0001997611815085545, |
|
"loss": 0.0503, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5390070921985816, |
|
"grad_norm": 0.5496141910552979, |
|
"learning_rate": 0.000199739089619713, |
|
"loss": 0.0493, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5460992907801419, |
|
"grad_norm": 0.8712863326072693, |
|
"learning_rate": 0.0001997160219520608, |
|
"loss": 0.0469, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5531914893617021, |
|
"grad_norm": 0.7600995302200317, |
|
"learning_rate": 0.0001996919787312761, |
|
"loss": 0.0544, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5602836879432624, |
|
"grad_norm": 0.6321051716804504, |
|
"learning_rate": 0.00019966696019258127, |
|
"loss": 0.0418, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5673758865248227, |
|
"grad_norm": 0.5661709904670715, |
|
"learning_rate": 0.00019964096658074056, |
|
"loss": 0.0437, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.574468085106383, |
|
"grad_norm": 0.322308748960495, |
|
"learning_rate": 0.00019961399815005763, |
|
"loss": 0.0379, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5815602836879432, |
|
"grad_norm": 0.5047584176063538, |
|
"learning_rate": 0.00019958605516437307, |
|
"loss": 0.0628, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5886524822695035, |
|
"grad_norm": 0.45054513216018677, |
|
"learning_rate": 0.0001995571378970619, |
|
"loss": 0.0475, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5957446808510638, |
|
"grad_norm": 0.7547653913497925, |
|
"learning_rate": 0.00019952724663103083, |
|
"loss": 0.0413, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6028368794326241, |
|
"grad_norm": 0.3875257670879364, |
|
"learning_rate": 0.00019949638165871547, |
|
"loss": 0.039, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6099290780141844, |
|
"grad_norm": 0.6647422313690186, |
|
"learning_rate": 0.00019946454328207753, |
|
"loss": 0.0559, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6170212765957447, |
|
"grad_norm": 0.3902786076068878, |
|
"learning_rate": 0.00019943173181260186, |
|
"loss": 0.0407, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.624113475177305, |
|
"grad_norm": 0.5156275033950806, |
|
"learning_rate": 0.00019939794757129332, |
|
"loss": 0.0443, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6312056737588653, |
|
"grad_norm": 0.5711575746536255, |
|
"learning_rate": 0.0001993631908886738, |
|
"loss": 0.0407, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"grad_norm": 0.33694812655448914, |
|
"learning_rate": 0.0001993274621047788, |
|
"loss": 0.0402, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6453900709219859, |
|
"grad_norm": 0.869691014289856, |
|
"learning_rate": 0.00019929076156915425, |
|
"loss": 0.0506, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6524822695035462, |
|
"grad_norm": 0.5810511112213135, |
|
"learning_rate": 0.00019925308964085297, |
|
"loss": 0.0537, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6595744680851063, |
|
"grad_norm": 0.666899561882019, |
|
"learning_rate": 0.00019921444668843125, |
|
"loss": 0.0574, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.7992278337478638, |
|
"learning_rate": 0.00019917483308994527, |
|
"loss": 0.0385, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6737588652482269, |
|
"grad_norm": 0.4858379662036896, |
|
"learning_rate": 0.00019913424923294722, |
|
"loss": 0.0473, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 0.5866559147834778, |
|
"learning_rate": 0.0001990926955144818, |
|
"loss": 0.0465, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6879432624113475, |
|
"grad_norm": 0.6053078770637512, |
|
"learning_rate": 0.000199050172341082, |
|
"loss": 0.0498, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6950354609929078, |
|
"grad_norm": 0.5185685157775879, |
|
"learning_rate": 0.00019900668012876543, |
|
"loss": 0.0444, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7021276595744681, |
|
"grad_norm": 0.6573126316070557, |
|
"learning_rate": 0.00019896221930303, |
|
"loss": 0.046, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7092198581560284, |
|
"grad_norm": 0.6348971724510193, |
|
"learning_rate": 0.00019891679029884993, |
|
"loss": 0.0439, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7163120567375887, |
|
"grad_norm": 0.6452783346176147, |
|
"learning_rate": 0.00019887039356067146, |
|
"loss": 0.0607, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.723404255319149, |
|
"grad_norm": 0.6160244941711426, |
|
"learning_rate": 0.00019882302954240836, |
|
"loss": 0.0397, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7304964539007093, |
|
"grad_norm": 0.5438401699066162, |
|
"learning_rate": 0.00019877469870743778, |
|
"loss": 0.0417, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7375886524822695, |
|
"grad_norm": 0.5617598295211792, |
|
"learning_rate": 0.00019872540152859536, |
|
"loss": 0.0405, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7446808510638298, |
|
"grad_norm": 0.4101731479167938, |
|
"learning_rate": 0.00019867513848817093, |
|
"loss": 0.0392, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.75177304964539, |
|
"grad_norm": 0.4860725998878479, |
|
"learning_rate": 0.00019862391007790354, |
|
"loss": 0.049, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7588652482269503, |
|
"grad_norm": 0.3138566315174103, |
|
"learning_rate": 0.00019857171679897687, |
|
"loss": 0.0372, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"grad_norm": 0.5368508100509644, |
|
"learning_rate": 0.00019851855916201404, |
|
"loss": 0.0466, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7730496453900709, |
|
"grad_norm": 0.5401434898376465, |
|
"learning_rate": 0.000198464437687073, |
|
"loss": 0.0489, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7801418439716312, |
|
"grad_norm": 0.3905884623527527, |
|
"learning_rate": 0.00019840935290364105, |
|
"loss": 0.0389, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7872340425531915, |
|
"grad_norm": 0.3689773976802826, |
|
"learning_rate": 0.00019835330535062994, |
|
"loss": 0.0432, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7943262411347518, |
|
"grad_norm": 0.5504758954048157, |
|
"learning_rate": 0.0001982962955763705, |
|
"loss": 0.04, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8014184397163121, |
|
"grad_norm": 0.3219192326068878, |
|
"learning_rate": 0.00019823832413860714, |
|
"loss": 0.0373, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8085106382978723, |
|
"grad_norm": 0.5545524954795837, |
|
"learning_rate": 0.00019817939160449272, |
|
"loss": 0.0367, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8156028368794326, |
|
"grad_norm": 0.5093657374382019, |
|
"learning_rate": 0.0001981194985505827, |
|
"loss": 0.0426, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8226950354609929, |
|
"grad_norm": 0.4452705979347229, |
|
"learning_rate": 0.00019805864556282957, |
|
"loss": 0.0357, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8297872340425532, |
|
"grad_norm": 0.5040962100028992, |
|
"learning_rate": 0.00019799683323657726, |
|
"loss": 0.0552, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8368794326241135, |
|
"grad_norm": 0.5125325322151184, |
|
"learning_rate": 0.00019793406217655517, |
|
"loss": 0.0455, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8439716312056738, |
|
"grad_norm": 0.5588168501853943, |
|
"learning_rate": 0.0001978703329968722, |
|
"loss": 0.0432, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 0.5096802711486816, |
|
"learning_rate": 0.00019780564632101096, |
|
"loss": 0.0488, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8581560283687943, |
|
"grad_norm": 0.2617908716201782, |
|
"learning_rate": 0.00019774000278182147, |
|
"loss": 0.0454, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8652482269503546, |
|
"grad_norm": 0.5790386199951172, |
|
"learning_rate": 0.00019767340302151513, |
|
"loss": 0.039, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8723404255319149, |
|
"grad_norm": 0.9014220237731934, |
|
"learning_rate": 0.00019760584769165824, |
|
"loss": 0.0325, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8794326241134752, |
|
"grad_norm": 0.41618219017982483, |
|
"learning_rate": 0.0001975373374531658, |
|
"loss": 0.0442, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8865248226950354, |
|
"grad_norm": 0.4697210192680359, |
|
"learning_rate": 0.00019746787297629496, |
|
"loss": 0.0436, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8936170212765957, |
|
"grad_norm": 0.4056944251060486, |
|
"learning_rate": 0.00019739745494063855, |
|
"loss": 0.0375, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.900709219858156, |
|
"grad_norm": 0.6047598123550415, |
|
"learning_rate": 0.00019732608403511822, |
|
"loss": 0.0323, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9078014184397163, |
|
"grad_norm": 0.411379873752594, |
|
"learning_rate": 0.00019725376095797804, |
|
"loss": 0.0483, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9148936170212766, |
|
"grad_norm": 0.2770962119102478, |
|
"learning_rate": 0.00019718048641677728, |
|
"loss": 0.0379, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9219858156028369, |
|
"grad_norm": 0.6620250940322876, |
|
"learning_rate": 0.00019710626112838382, |
|
"loss": 0.042, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9290780141843972, |
|
"grad_norm": 0.4887118339538574, |
|
"learning_rate": 0.0001970310858189669, |
|
"loss": 0.0419, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9361702127659575, |
|
"grad_norm": 0.4877367615699768, |
|
"learning_rate": 0.0001969549612239902, |
|
"loss": 0.0386, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9432624113475178, |
|
"grad_norm": 0.6139522790908813, |
|
"learning_rate": 0.00019687788808820452, |
|
"loss": 0.0411, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.950354609929078, |
|
"grad_norm": 0.628237247467041, |
|
"learning_rate": 0.0001967998671656405, |
|
"loss": 0.0532, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9574468085106383, |
|
"grad_norm": 0.5345218777656555, |
|
"learning_rate": 0.00019672089921960137, |
|
"loss": 0.0414, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9645390070921985, |
|
"grad_norm": 0.35509511828422546, |
|
"learning_rate": 0.00019664098502265525, |
|
"loss": 0.0464, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9716312056737588, |
|
"grad_norm": 0.4468687176704407, |
|
"learning_rate": 0.00019656012535662786, |
|
"loss": 0.0395, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9787234042553191, |
|
"grad_norm": 0.5955891013145447, |
|
"learning_rate": 0.0001964783210125946, |
|
"loss": 0.045, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.9858156028368794, |
|
"grad_norm": 0.48954424262046814, |
|
"learning_rate": 0.0001963955727908732, |
|
"loss": 0.0471, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9929078014184397, |
|
"grad_norm": 0.450183242559433, |
|
"learning_rate": 0.00019631188150101534, |
|
"loss": 0.0366, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.41711631417274475, |
|
"learning_rate": 0.0001962272479617992, |
|
"loss": 0.0528, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0070921985815602, |
|
"grad_norm": 0.3645707666873932, |
|
"learning_rate": 0.00019614167300122126, |
|
"loss": 0.043, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0141843971631206, |
|
"grad_norm": 0.42011067271232605, |
|
"learning_rate": 0.00019605515745648822, |
|
"loss": 0.0402, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.0212765957446808, |
|
"grad_norm": 0.4155680537223816, |
|
"learning_rate": 0.0001959677021740088, |
|
"loss": 0.0392, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0283687943262412, |
|
"grad_norm": 0.49862140417099, |
|
"learning_rate": 0.00019587930800938545, |
|
"loss": 0.0484, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0354609929078014, |
|
"grad_norm": 0.42772340774536133, |
|
"learning_rate": 0.00019578997582740603, |
|
"loss": 0.0349, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0425531914893618, |
|
"grad_norm": 0.40276241302490234, |
|
"learning_rate": 0.00019569970650203534, |
|
"loss": 0.0335, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.049645390070922, |
|
"grad_norm": 0.369478315114975, |
|
"learning_rate": 0.00019560850091640647, |
|
"loss": 0.0402, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0567375886524824, |
|
"grad_norm": 0.7682146430015564, |
|
"learning_rate": 0.00019551635996281231, |
|
"loss": 0.0392, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 0.619755744934082, |
|
"learning_rate": 0.0001954232845426967, |
|
"loss": 0.0426, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.070921985815603, |
|
"grad_norm": 0.3280376195907593, |
|
"learning_rate": 0.00019532927556664573, |
|
"loss": 0.0311, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0780141843971631, |
|
"grad_norm": 0.42268431186676025, |
|
"learning_rate": 0.00019523433395437866, |
|
"loss": 0.0354, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.0851063829787233, |
|
"grad_norm": 0.34740039706230164, |
|
"learning_rate": 0.00019513846063473907, |
|
"loss": 0.0374, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.0921985815602837, |
|
"grad_norm": 0.36237838864326477, |
|
"learning_rate": 0.00019504165654568576, |
|
"loss": 0.028, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.099290780141844, |
|
"grad_norm": 0.4167526364326477, |
|
"learning_rate": 0.00019494392263428353, |
|
"loss": 0.0447, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1063829787234043, |
|
"grad_norm": 0.5500767827033997, |
|
"learning_rate": 0.00019484525985669383, |
|
"loss": 0.0305, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.1134751773049645, |
|
"grad_norm": 0.4666343927383423, |
|
"learning_rate": 0.00019474566917816565, |
|
"loss": 0.0323, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.1205673758865249, |
|
"grad_norm": 0.43880197405815125, |
|
"learning_rate": 0.0001946451515730258, |
|
"loss": 0.0305, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.127659574468085, |
|
"grad_norm": 0.4338850677013397, |
|
"learning_rate": 0.00019454370802466953, |
|
"loss": 0.035, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1347517730496455, |
|
"grad_norm": 0.4450472295284271, |
|
"learning_rate": 0.00019444133952555096, |
|
"loss": 0.0371, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1418439716312057, |
|
"grad_norm": 0.38660725951194763, |
|
"learning_rate": 0.00019433804707717328, |
|
"loss": 0.0388, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.148936170212766, |
|
"grad_norm": 0.5273442268371582, |
|
"learning_rate": 0.0001942338316900788, |
|
"loss": 0.0392, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1560283687943262, |
|
"grad_norm": 0.5786083340644836, |
|
"learning_rate": 0.00019412869438383945, |
|
"loss": 0.0409, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.1631205673758864, |
|
"grad_norm": 0.3235854506492615, |
|
"learning_rate": 0.00019402263618704642, |
|
"loss": 0.0325, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1702127659574468, |
|
"grad_norm": 0.4242108166217804, |
|
"learning_rate": 0.0001939156581373004, |
|
"loss": 0.0295, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.177304964539007, |
|
"grad_norm": 0.518750011920929, |
|
"learning_rate": 0.00019380776128120116, |
|
"loss": 0.0412, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.1843971631205674, |
|
"grad_norm": 0.3119601607322693, |
|
"learning_rate": 0.00019369894667433754, |
|
"loss": 0.027, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.1914893617021276, |
|
"grad_norm": 0.3405968248844147, |
|
"learning_rate": 0.00019358921538127697, |
|
"loss": 0.036, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.198581560283688, |
|
"grad_norm": 0.62176913022995, |
|
"learning_rate": 0.00019347856847555512, |
|
"loss": 0.0527, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.2056737588652482, |
|
"grad_norm": 0.432858943939209, |
|
"learning_rate": 0.00019336700703966538, |
|
"loss": 0.0381, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2127659574468086, |
|
"grad_norm": 0.23901493847370148, |
|
"learning_rate": 0.0001932545321650483, |
|
"loss": 0.0295, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.2198581560283688, |
|
"grad_norm": 0.3481459319591522, |
|
"learning_rate": 0.00019314114495208086, |
|
"loss": 0.0418, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.226950354609929, |
|
"grad_norm": 0.3281635344028473, |
|
"learning_rate": 0.00019302684651006574, |
|
"loss": 0.0282, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.2340425531914894, |
|
"grad_norm": 0.3726864755153656, |
|
"learning_rate": 0.00019291163795722048, |
|
"loss": 0.0384, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.2411347517730495, |
|
"grad_norm": 0.35322147607803345, |
|
"learning_rate": 0.00019279552042066652, |
|
"loss": 0.0267, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.24822695035461, |
|
"grad_norm": 0.3819480538368225, |
|
"learning_rate": 0.0001926784950364181, |
|
"loss": 0.0317, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2553191489361701, |
|
"grad_norm": 0.5167778134346008, |
|
"learning_rate": 0.00019256056294937132, |
|
"loss": 0.0365, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.2624113475177305, |
|
"grad_norm": 0.2764042615890503, |
|
"learning_rate": 0.00019244172531329278, |
|
"loss": 0.03, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2695035460992907, |
|
"grad_norm": 0.568465530872345, |
|
"learning_rate": 0.00019232198329080836, |
|
"loss": 0.042, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"grad_norm": 0.47287610173225403, |
|
"learning_rate": 0.00019220133805339184, |
|
"loss": 0.0431, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.2836879432624113, |
|
"grad_norm": 0.5068013072013855, |
|
"learning_rate": 0.00019207979078135346, |
|
"loss": 0.0326, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.2907801418439715, |
|
"grad_norm": 0.4016683101654053, |
|
"learning_rate": 0.00019195734266382828, |
|
"loss": 0.0327, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.297872340425532, |
|
"grad_norm": 0.21608802676200867, |
|
"learning_rate": 0.00019183399489876467, |
|
"loss": 0.0332, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3049645390070923, |
|
"grad_norm": 0.5429771542549133, |
|
"learning_rate": 0.00019170974869291255, |
|
"loss": 0.0349, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.3120567375886525, |
|
"grad_norm": 0.3801655173301697, |
|
"learning_rate": 0.00019158460526181152, |
|
"loss": 0.0377, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.3191489361702127, |
|
"grad_norm": 0.3311924934387207, |
|
"learning_rate": 0.00019145856582977904, |
|
"loss": 0.0346, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.326241134751773, |
|
"grad_norm": 0.5218601226806641, |
|
"learning_rate": 0.0001913316316298984, |
|
"loss": 0.0302, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.37110060453414917, |
|
"learning_rate": 0.0001912038039040067, |
|
"loss": 0.0387, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.3404255319148937, |
|
"grad_norm": 0.46655604243278503, |
|
"learning_rate": 0.00019107508390268276, |
|
"loss": 0.0337, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.3475177304964538, |
|
"grad_norm": 0.33353927731513977, |
|
"learning_rate": 0.00019094547288523467, |
|
"loss": 0.0466, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3546099290780143, |
|
"grad_norm": 0.44526827335357666, |
|
"learning_rate": 0.00019081497211968773, |
|
"loss": 0.0352, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.3617021276595744, |
|
"grad_norm": 0.4011070430278778, |
|
"learning_rate": 0.00019068358288277187, |
|
"loss": 0.0294, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.3687943262411348, |
|
"grad_norm": 0.3325769007205963, |
|
"learning_rate": 0.0001905513064599092, |
|
"loss": 0.0361, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.375886524822695, |
|
"grad_norm": 0.2585453987121582, |
|
"learning_rate": 0.0001904181441452015, |
|
"loss": 0.0302, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.3829787234042552, |
|
"grad_norm": 0.441824734210968, |
|
"learning_rate": 0.00019028409724141746, |
|
"loss": 0.0418, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.3900709219858156, |
|
"grad_norm": 0.41769495606422424, |
|
"learning_rate": 0.00019014916705998002, |
|
"loss": 0.0301, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.397163120567376, |
|
"grad_norm": 0.24308504164218903, |
|
"learning_rate": 0.00019001335492095347, |
|
"loss": 0.0353, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.4042553191489362, |
|
"grad_norm": 0.3190462589263916, |
|
"learning_rate": 0.00018987666215303058, |
|
"loss": 0.0385, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.4113475177304964, |
|
"grad_norm": 0.452863484621048, |
|
"learning_rate": 0.0001897390900935196, |
|
"loss": 0.0367, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.4184397163120568, |
|
"grad_norm": 0.3386947214603424, |
|
"learning_rate": 0.00018960064008833116, |
|
"loss": 0.0372, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.425531914893617, |
|
"grad_norm": 0.40042638778686523, |
|
"learning_rate": 0.0001894613134919651, |
|
"loss": 0.0354, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.4326241134751774, |
|
"grad_norm": 0.3207855820655823, |
|
"learning_rate": 0.00018932111166749724, |
|
"loss": 0.0304, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.4397163120567376, |
|
"grad_norm": 0.277885377407074, |
|
"learning_rate": 0.000189180035986566, |
|
"loss": 0.0303, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.4468085106382977, |
|
"grad_norm": 0.4140087366104126, |
|
"learning_rate": 0.00018903808782935904, |
|
"loss": 0.0344, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.4539007092198581, |
|
"grad_norm": 0.3237980306148529, |
|
"learning_rate": 0.00018889526858459975, |
|
"loss": 0.0317, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.4609929078014185, |
|
"grad_norm": 0.38879090547561646, |
|
"learning_rate": 0.00018875157964953358, |
|
"loss": 0.0344, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4680851063829787, |
|
"grad_norm": 0.39974260330200195, |
|
"learning_rate": 0.0001886070224299145, |
|
"loss": 0.0339, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.475177304964539, |
|
"grad_norm": 0.3247739374637604, |
|
"learning_rate": 0.00018846159833999114, |
|
"loss": 0.0317, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.4822695035460993, |
|
"grad_norm": 0.5183299779891968, |
|
"learning_rate": 0.000188315308802493, |
|
"loss": 0.0262, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.4893617021276595, |
|
"grad_norm": 0.44669198989868164, |
|
"learning_rate": 0.00018816815524861654, |
|
"loss": 0.0323, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.49645390070922, |
|
"grad_norm": 0.5130909085273743, |
|
"learning_rate": 0.00018802013911801112, |
|
"loss": 0.0361, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.50354609929078, |
|
"grad_norm": 0.47592759132385254, |
|
"learning_rate": 0.00018787126185876502, |
|
"loss": 0.0423, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.5106382978723403, |
|
"grad_norm": 0.27714022994041443, |
|
"learning_rate": 0.0001877215249273912, |
|
"loss": 0.0398, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.5177304964539007, |
|
"grad_norm": 0.37813687324523926, |
|
"learning_rate": 0.00018757092978881302, |
|
"loss": 0.0285, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.524822695035461, |
|
"grad_norm": 0.2609386742115021, |
|
"learning_rate": 0.00018741947791634994, |
|
"loss": 0.0303, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.5319148936170213, |
|
"grad_norm": 0.36917203664779663, |
|
"learning_rate": 0.00018726717079170323, |
|
"loss": 0.0473, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.5390070921985815, |
|
"grad_norm": 0.18295632302761078, |
|
"learning_rate": 0.00018711400990494123, |
|
"loss": 0.0246, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5460992907801419, |
|
"grad_norm": 0.2882087826728821, |
|
"learning_rate": 0.00018695999675448496, |
|
"loss": 0.0224, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.5531914893617023, |
|
"grad_norm": 0.35880571603775024, |
|
"learning_rate": 0.00018680513284709344, |
|
"loss": 0.0299, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.5602836879432624, |
|
"grad_norm": 0.4506542384624481, |
|
"learning_rate": 0.00018664941969784882, |
|
"loss": 0.0312, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5673758865248226, |
|
"grad_norm": 0.3101454973220825, |
|
"learning_rate": 0.00018649285883014173, |
|
"loss": 0.036, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.574468085106383, |
|
"grad_norm": 0.3249278664588928, |
|
"learning_rate": 0.00018633545177565623, |
|
"loss": 0.0357, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.5815602836879432, |
|
"grad_norm": 0.23713338375091553, |
|
"learning_rate": 0.00018617720007435497, |
|
"loss": 0.0346, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.5886524822695036, |
|
"grad_norm": 0.2550758421421051, |
|
"learning_rate": 0.00018601810527446398, |
|
"loss": 0.0265, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"grad_norm": 0.41524937748908997, |
|
"learning_rate": 0.00018585816893245763, |
|
"loss": 0.0299, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.602836879432624, |
|
"grad_norm": 0.3899802565574646, |
|
"learning_rate": 0.00018569739261304328, |
|
"loss": 0.0361, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.6099290780141844, |
|
"grad_norm": 0.4563800096511841, |
|
"learning_rate": 0.00018553577788914618, |
|
"loss": 0.0358, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.6170212765957448, |
|
"grad_norm": 0.8882343769073486, |
|
"learning_rate": 0.00018537332634189384, |
|
"loss": 0.0419, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.624113475177305, |
|
"grad_norm": 0.5693446397781372, |
|
"learning_rate": 0.00018521003956060078, |
|
"loss": 0.0401, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.6312056737588652, |
|
"grad_norm": 0.3942001461982727, |
|
"learning_rate": 0.00018504591914275274, |
|
"loss": 0.035, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.6382978723404256, |
|
"grad_norm": 0.40895143151283264, |
|
"learning_rate": 0.00018488096669399133, |
|
"loss": 0.0292, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.645390070921986, |
|
"grad_norm": 0.43449512124061584, |
|
"learning_rate": 0.0001847151838280981, |
|
"loss": 0.0367, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.6524822695035462, |
|
"grad_norm": 0.4020148515701294, |
|
"learning_rate": 0.00018454857216697882, |
|
"loss": 0.0288, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.6595744680851063, |
|
"grad_norm": 0.4451272785663605, |
|
"learning_rate": 0.0001843811333406477, |
|
"loss": 0.0375, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.26918983459472656, |
|
"learning_rate": 0.00018421286898721127, |
|
"loss": 0.0362, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.673758865248227, |
|
"grad_norm": 0.3901691436767578, |
|
"learning_rate": 0.0001840437807528525, |
|
"loss": 0.0285, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.6808510638297873, |
|
"grad_norm": 0.3481754660606384, |
|
"learning_rate": 0.00018387387029181472, |
|
"loss": 0.0292, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.6879432624113475, |
|
"grad_norm": 0.40919333696365356, |
|
"learning_rate": 0.00018370313926638522, |
|
"loss": 0.0353, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.6950354609929077, |
|
"grad_norm": 0.24539948999881744, |
|
"learning_rate": 0.0001835315893468792, |
|
"loss": 0.0407, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"grad_norm": 0.25068360567092896, |
|
"learning_rate": 0.00018335922221162336, |
|
"loss": 0.0313, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7092198581560285, |
|
"grad_norm": 0.45453083515167236, |
|
"learning_rate": 0.00018318603954693948, |
|
"loss": 0.0328, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.7163120567375887, |
|
"grad_norm": 0.5006433129310608, |
|
"learning_rate": 0.0001830120430471279, |
|
"loss": 0.03, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.7234042553191489, |
|
"grad_norm": 0.3591817021369934, |
|
"learning_rate": 0.00018283723441445097, |
|
"loss": 0.0325, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.7304964539007093, |
|
"grad_norm": 0.6227660775184631, |
|
"learning_rate": 0.00018266161535911642, |
|
"loss": 0.032, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.7375886524822695, |
|
"grad_norm": 0.24666891992092133, |
|
"learning_rate": 0.00018248518759926053, |
|
"loss": 0.0388, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7446808510638299, |
|
"grad_norm": 0.34065425395965576, |
|
"learning_rate": 0.0001823079528609315, |
|
"loss": 0.0331, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.75177304964539, |
|
"grad_norm": 0.340526819229126, |
|
"learning_rate": 0.00018212991287807232, |
|
"loss": 0.0297, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7588652482269502, |
|
"grad_norm": 0.3198091983795166, |
|
"learning_rate": 0.00018195106939250408, |
|
"loss": 0.0337, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7659574468085106, |
|
"grad_norm": 0.3750438988208771, |
|
"learning_rate": 0.00018177142415390867, |
|
"loss": 0.0341, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.773049645390071, |
|
"grad_norm": 0.3867601454257965, |
|
"learning_rate": 0.00018159097891981186, |
|
"loss": 0.0318, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.7801418439716312, |
|
"grad_norm": 0.37601733207702637, |
|
"learning_rate": 0.00018140973545556594, |
|
"loss": 0.0349, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.7872340425531914, |
|
"grad_norm": 0.3913877606391907, |
|
"learning_rate": 0.00018122769553433266, |
|
"loss": 0.0257, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.7943262411347518, |
|
"grad_norm": 0.30526429414749146, |
|
"learning_rate": 0.00018104486093706567, |
|
"loss": 0.0275, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.8014184397163122, |
|
"grad_norm": 0.6147098541259766, |
|
"learning_rate": 0.0001808612334524932, |
|
"loss": 0.0408, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.8085106382978724, |
|
"grad_norm": 0.3850766718387604, |
|
"learning_rate": 0.00018067681487710053, |
|
"loss": 0.0291, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.8156028368794326, |
|
"grad_norm": 0.4822274148464203, |
|
"learning_rate": 0.00018049160701511248, |
|
"loss": 0.0441, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.8226950354609928, |
|
"grad_norm": 0.3193504810333252, |
|
"learning_rate": 0.00018030561167847568, |
|
"loss": 0.0349, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.8297872340425532, |
|
"grad_norm": 0.2928631901741028, |
|
"learning_rate": 0.00018011883068684085, |
|
"loss": 0.0401, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.8368794326241136, |
|
"grad_norm": 0.3406616151332855, |
|
"learning_rate": 0.00017993126586754508, |
|
"loss": 0.031, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.8439716312056738, |
|
"grad_norm": 0.33067846298217773, |
|
"learning_rate": 0.00017974291905559382, |
|
"loss": 0.043, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.851063829787234, |
|
"grad_norm": 0.39099758863449097, |
|
"learning_rate": 0.00017955379209364303, |
|
"loss": 0.0315, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.8581560283687943, |
|
"grad_norm": 0.34113165736198425, |
|
"learning_rate": 0.00017936388683198112, |
|
"loss": 0.0328, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8652482269503547, |
|
"grad_norm": 0.5652561783790588, |
|
"learning_rate": 0.0001791732051285109, |
|
"loss": 0.0356, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.872340425531915, |
|
"grad_norm": 0.38378405570983887, |
|
"learning_rate": 0.0001789817488487313, |
|
"loss": 0.0324, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.8794326241134751, |
|
"grad_norm": 0.34848251938819885, |
|
"learning_rate": 0.00017878951986571913, |
|
"loss": 0.0361, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.8865248226950353, |
|
"grad_norm": 0.34506094455718994, |
|
"learning_rate": 0.00017859652006011088, |
|
"loss": 0.0254, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.8936170212765957, |
|
"grad_norm": 0.26408687233924866, |
|
"learning_rate": 0.00017840275132008422, |
|
"loss": 0.0316, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.900709219858156, |
|
"grad_norm": 0.4670010805130005, |
|
"learning_rate": 0.0001782082155413395, |
|
"loss": 0.0274, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.9078014184397163, |
|
"grad_norm": 0.2338956594467163, |
|
"learning_rate": 0.00017801291462708134, |
|
"loss": 0.0227, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.9148936170212765, |
|
"grad_norm": 0.3896683156490326, |
|
"learning_rate": 0.00017781685048799984, |
|
"loss": 0.0311, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.9219858156028369, |
|
"grad_norm": 0.3847581446170807, |
|
"learning_rate": 0.000177620025042252, |
|
"loss": 0.0265, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.9290780141843973, |
|
"grad_norm": 0.47572168707847595, |
|
"learning_rate": 0.00017742244021544293, |
|
"loss": 0.0318, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.9361702127659575, |
|
"grad_norm": 0.256161630153656, |
|
"learning_rate": 0.00017722409794060693, |
|
"loss": 0.0228, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.9432624113475176, |
|
"grad_norm": 0.4607986807823181, |
|
"learning_rate": 0.00017702500015818876, |
|
"loss": 0.0289, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.950354609929078, |
|
"grad_norm": 0.26722726225852966, |
|
"learning_rate": 0.0001768251488160245, |
|
"loss": 0.0256, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9574468085106385, |
|
"grad_norm": 0.3193138539791107, |
|
"learning_rate": 0.00017662454586932254, |
|
"loss": 0.0277, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.9645390070921986, |
|
"grad_norm": 0.3791826665401459, |
|
"learning_rate": 0.00017642319328064446, |
|
"loss": 0.029, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.9716312056737588, |
|
"grad_norm": 0.24767844378948212, |
|
"learning_rate": 0.0001762210930198858, |
|
"loss": 0.0243, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.978723404255319, |
|
"grad_norm": 0.2745870053768158, |
|
"learning_rate": 0.00017601824706425684, |
|
"loss": 0.0343, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.9858156028368794, |
|
"grad_norm": 0.3385297656059265, |
|
"learning_rate": 0.0001758146573982632, |
|
"loss": 0.0382, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.9929078014184398, |
|
"grad_norm": 0.5617783665657043, |
|
"learning_rate": 0.0001756103260136865, |
|
"loss": 0.0285, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.40970340371131897, |
|
"learning_rate": 0.0001754052549095648, |
|
"loss": 0.0259, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.00709219858156, |
|
"grad_norm": 0.27524593472480774, |
|
"learning_rate": 0.00017519944609217295, |
|
"loss": 0.0315, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.0141843971631204, |
|
"grad_norm": 0.42446693778038025, |
|
"learning_rate": 0.00017499290157500333, |
|
"loss": 0.0284, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.021276595744681, |
|
"grad_norm": 0.42036786675453186, |
|
"learning_rate": 0.00017478562337874568, |
|
"loss": 0.0337, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.028368794326241, |
|
"grad_norm": 0.5692403316497803, |
|
"learning_rate": 0.00017457761353126765, |
|
"loss": 0.0298, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.0354609929078014, |
|
"grad_norm": 0.470859557390213, |
|
"learning_rate": 0.00017436887406759488, |
|
"loss": 0.0343, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.0425531914893615, |
|
"grad_norm": 0.36999377608299255, |
|
"learning_rate": 0.00017415940702989103, |
|
"loss": 0.0305, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.049645390070922, |
|
"grad_norm": 0.4802422523498535, |
|
"learning_rate": 0.00017394921446743783, |
|
"loss": 0.0326, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.0567375886524824, |
|
"grad_norm": 0.26723712682724, |
|
"learning_rate": 0.0001737382984366151, |
|
"loss": 0.0294, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.0638297872340425, |
|
"grad_norm": 0.5008073449134827, |
|
"learning_rate": 0.00017352666100088051, |
|
"loss": 0.0364, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.0709219858156027, |
|
"grad_norm": 0.35637590289115906, |
|
"learning_rate": 0.0001733143042307496, |
|
"loss": 0.0272, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.078014184397163, |
|
"grad_norm": 0.35349592566490173, |
|
"learning_rate": 0.00017310123020377517, |
|
"loss": 0.0284, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.0851063829787235, |
|
"grad_norm": 0.401716023683548, |
|
"learning_rate": 0.00017288744100452737, |
|
"loss": 0.0334, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.0921985815602837, |
|
"grad_norm": 0.4555324912071228, |
|
"learning_rate": 0.000172672938724573, |
|
"loss": 0.0377, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.099290780141844, |
|
"grad_norm": 0.349065363407135, |
|
"learning_rate": 0.00017245772546245518, |
|
"loss": 0.0326, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.106382978723404, |
|
"grad_norm": 0.42119070887565613, |
|
"learning_rate": 0.00017224180332367275, |
|
"loss": 0.0325, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.1134751773049647, |
|
"grad_norm": 0.4668595492839813, |
|
"learning_rate": 0.00017202517442065974, |
|
"loss": 0.0275, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.120567375886525, |
|
"grad_norm": 0.29753541946411133, |
|
"learning_rate": 0.00017180784087276476, |
|
"loss": 0.0347, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 0.34885773062705994, |
|
"learning_rate": 0.00017158980480623003, |
|
"loss": 0.0427, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1347517730496453, |
|
"grad_norm": 0.3699316680431366, |
|
"learning_rate": 0.00017137106835417084, |
|
"loss": 0.0302, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.141843971631206, |
|
"grad_norm": 0.40523409843444824, |
|
"learning_rate": 0.00017115163365655456, |
|
"loss": 0.0322, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.148936170212766, |
|
"grad_norm": 0.24978692829608917, |
|
"learning_rate": 0.00017093150286017964, |
|
"loss": 0.0321, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.1560283687943262, |
|
"grad_norm": 0.2739085853099823, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 0.0302, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.1631205673758864, |
|
"grad_norm": 0.3967369794845581, |
|
"learning_rate": 0.00017048916159237768, |
|
"loss": 0.0328, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.1702127659574466, |
|
"grad_norm": 0.2717509865760803, |
|
"learning_rate": 0.00017026695544851403, |
|
"loss": 0.0253, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.1773049645390072, |
|
"grad_norm": 0.46482571959495544, |
|
"learning_rate": 0.0001700440618609763, |
|
"loss": 0.0265, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.1843971631205674, |
|
"grad_norm": 0.42749258875846863, |
|
"learning_rate": 0.00016982048301040237, |
|
"loss": 0.0362, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.1914893617021276, |
|
"grad_norm": 0.49205732345581055, |
|
"learning_rate": 0.00016959622108413428, |
|
"loss": 0.0332, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.198581560283688, |
|
"grad_norm": 0.3003978729248047, |
|
"learning_rate": 0.00016937127827619685, |
|
"loss": 0.0338, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.2056737588652484, |
|
"grad_norm": 0.41363534331321716, |
|
"learning_rate": 0.00016914565678727617, |
|
"loss": 0.0317, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.2127659574468086, |
|
"grad_norm": 0.25217387080192566, |
|
"learning_rate": 0.000168919358824698, |
|
"loss": 0.0292, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.219858156028369, |
|
"grad_norm": 0.26034829020500183, |
|
"learning_rate": 0.00016869238660240638, |
|
"loss": 0.0286, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.226950354609929, |
|
"grad_norm": 0.333636999130249, |
|
"learning_rate": 0.00016846474234094176, |
|
"loss": 0.0305, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.2340425531914896, |
|
"grad_norm": 0.45352891087532043, |
|
"learning_rate": 0.00016823642826741938, |
|
"loss": 0.0315, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.2411347517730498, |
|
"grad_norm": 0.2589890658855438, |
|
"learning_rate": 0.00016800744661550745, |
|
"loss": 0.0259, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.24822695035461, |
|
"grad_norm": 0.39509129524230957, |
|
"learning_rate": 0.00016777779962540534, |
|
"loss": 0.0331, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.25531914893617, |
|
"grad_norm": 0.42908716201782227, |
|
"learning_rate": 0.00016754748954382165, |
|
"loss": 0.0342, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.2624113475177303, |
|
"grad_norm": 0.28686532378196716, |
|
"learning_rate": 0.0001673165186239521, |
|
"loss": 0.0288, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.269503546099291, |
|
"grad_norm": 0.5497453808784485, |
|
"learning_rate": 0.0001670848891254577, |
|
"loss": 0.0361, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.276595744680851, |
|
"grad_norm": 0.4589519798755646, |
|
"learning_rate": 0.00016685260331444253, |
|
"loss": 0.0266, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.2836879432624113, |
|
"grad_norm": 0.4259999990463257, |
|
"learning_rate": 0.0001666196634634316, |
|
"loss": 0.0273, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.2907801418439715, |
|
"grad_norm": 0.4129974842071533, |
|
"learning_rate": 0.00016638607185134852, |
|
"loss": 0.029, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.297872340425532, |
|
"grad_norm": 0.3736423850059509, |
|
"learning_rate": 0.00016615183076349336, |
|
"loss": 0.0255, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.3049645390070923, |
|
"grad_norm": 0.38349997997283936, |
|
"learning_rate": 0.00016591694249152013, |
|
"loss": 0.026, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.3120567375886525, |
|
"grad_norm": 0.4713188409805298, |
|
"learning_rate": 0.0001656814093334146, |
|
"loss": 0.031, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.3191489361702127, |
|
"grad_norm": 0.5241472721099854, |
|
"learning_rate": 0.00016544523359347143, |
|
"loss": 0.0298, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.326241134751773, |
|
"grad_norm": 0.25673630833625793, |
|
"learning_rate": 0.0001652084175822721, |
|
"loss": 0.0277, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 0.2788539528846741, |
|
"learning_rate": 0.0001649709636166619, |
|
"loss": 0.0293, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.3404255319148937, |
|
"grad_norm": 0.33457013964653015, |
|
"learning_rate": 0.00016473287401972756, |
|
"loss": 0.0331, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.347517730496454, |
|
"grad_norm": 0.240670844912529, |
|
"learning_rate": 0.0001644941511207742, |
|
"loss": 0.0223, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.354609929078014, |
|
"grad_norm": 0.20768260955810547, |
|
"learning_rate": 0.00016425479725530292, |
|
"loss": 0.0239, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.3617021276595747, |
|
"grad_norm": 0.4587162733078003, |
|
"learning_rate": 0.00016401481476498772, |
|
"loss": 0.0278, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.368794326241135, |
|
"grad_norm": 0.4795977473258972, |
|
"learning_rate": 0.00016377420599765255, |
|
"loss": 0.0263, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.375886524822695, |
|
"grad_norm": 0.20740842819213867, |
|
"learning_rate": 0.0001635329733072485, |
|
"loss": 0.0273, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.382978723404255, |
|
"grad_norm": 0.28621232509613037, |
|
"learning_rate": 0.0001632911190538307, |
|
"loss": 0.0289, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.3900709219858154, |
|
"grad_norm": 0.4048430621623993, |
|
"learning_rate": 0.00016304864560353518, |
|
"loss": 0.0337, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.397163120567376, |
|
"grad_norm": 0.3549022674560547, |
|
"learning_rate": 0.00016280555532855576, |
|
"loss": 0.021, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.404255319148936, |
|
"grad_norm": 0.3781268000602722, |
|
"learning_rate": 0.00016256185060712093, |
|
"loss": 0.0278, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.4113475177304964, |
|
"grad_norm": 0.372190922498703, |
|
"learning_rate": 0.00016231753382347047, |
|
"loss": 0.0255, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.4184397163120566, |
|
"grad_norm": 0.20026937127113342, |
|
"learning_rate": 0.00016207260736783203, |
|
"loss": 0.0261, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.425531914893617, |
|
"grad_norm": 0.24735766649246216, |
|
"learning_rate": 0.00016182707363639808, |
|
"loss": 0.0252, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.4326241134751774, |
|
"grad_norm": 0.36811962723731995, |
|
"learning_rate": 0.00016158093503130215, |
|
"loss": 0.0302, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.4397163120567376, |
|
"grad_norm": 0.41325879096984863, |
|
"learning_rate": 0.0001613341939605954, |
|
"loss": 0.0402, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.4468085106382977, |
|
"grad_norm": 0.3262059688568115, |
|
"learning_rate": 0.00016108685283822317, |
|
"loss": 0.027, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.453900709219858, |
|
"grad_norm": 0.34455767273902893, |
|
"learning_rate": 0.0001608389140840013, |
|
"loss": 0.0296, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.4609929078014185, |
|
"grad_norm": 0.42494380474090576, |
|
"learning_rate": 0.0001605903801235924, |
|
"loss": 0.0307, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.4680851063829787, |
|
"grad_norm": 0.289614200592041, |
|
"learning_rate": 0.00016034125338848222, |
|
"loss": 0.0274, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.475177304964539, |
|
"grad_norm": 0.5783960223197937, |
|
"learning_rate": 0.0001600915363159557, |
|
"loss": 0.0355, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.482269503546099, |
|
"grad_norm": 0.28593072295188904, |
|
"learning_rate": 0.00015984123134907345, |
|
"loss": 0.0291, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.4893617021276597, |
|
"grad_norm": 0.2484228014945984, |
|
"learning_rate": 0.00015959034093664738, |
|
"loss": 0.0324, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.49645390070922, |
|
"grad_norm": 0.35120323300361633, |
|
"learning_rate": 0.00015933886753321722, |
|
"loss": 0.0324, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.50354609929078, |
|
"grad_norm": 0.2902463376522064, |
|
"learning_rate": 0.0001590868135990261, |
|
"loss": 0.0265, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.5106382978723403, |
|
"grad_norm": 0.3429517149925232, |
|
"learning_rate": 0.0001588341815999968, |
|
"loss": 0.0376, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.5177304964539005, |
|
"grad_norm": 0.3964468240737915, |
|
"learning_rate": 0.0001585809740077074, |
|
"loss": 0.0263, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.524822695035461, |
|
"grad_norm": 0.1953706592321396, |
|
"learning_rate": 0.0001583271932993673, |
|
"loss": 0.0314, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.5319148936170213, |
|
"grad_norm": 0.35029205679893494, |
|
"learning_rate": 0.00015807284195779272, |
|
"loss": 0.0347, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.5390070921985815, |
|
"grad_norm": 0.229562446475029, |
|
"learning_rate": 0.0001578179224713827, |
|
"loss": 0.0281, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.546099290780142, |
|
"grad_norm": 0.379069447517395, |
|
"learning_rate": 0.00015756243733409456, |
|
"loss": 0.0296, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"grad_norm": 0.17396724224090576, |
|
"learning_rate": 0.00015730638904541957, |
|
"loss": 0.0252, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.5602836879432624, |
|
"grad_norm": 0.29691052436828613, |
|
"learning_rate": 0.00015704978011035845, |
|
"loss": 0.0292, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.5673758865248226, |
|
"grad_norm": 0.39194944500923157, |
|
"learning_rate": 0.000156792613039397, |
|
"loss": 0.0274, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.574468085106383, |
|
"grad_norm": 0.1824718415737152, |
|
"learning_rate": 0.00015653489034848125, |
|
"loss": 0.0252, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.581560283687943, |
|
"grad_norm": 0.22263765335083008, |
|
"learning_rate": 0.00015627661455899327, |
|
"loss": 0.0208, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.5886524822695036, |
|
"grad_norm": 0.2892332077026367, |
|
"learning_rate": 0.00015601778819772613, |
|
"loss": 0.027, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.595744680851064, |
|
"grad_norm": 0.2670251429080963, |
|
"learning_rate": 0.00015575841379685928, |
|
"loss": 0.023, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.602836879432624, |
|
"grad_norm": 0.2950701415538788, |
|
"learning_rate": 0.00015549849389393395, |
|
"loss": 0.0257, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.6099290780141846, |
|
"grad_norm": 0.3107167184352875, |
|
"learning_rate": 0.00015523803103182805, |
|
"loss": 0.0244, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.617021276595745, |
|
"grad_norm": 0.35476645827293396, |
|
"learning_rate": 0.00015497702775873156, |
|
"loss": 0.0229, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.624113475177305, |
|
"grad_norm": 0.37949270009994507, |
|
"learning_rate": 0.00015471548662812133, |
|
"loss": 0.029, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.631205673758865, |
|
"grad_norm": 0.39569422602653503, |
|
"learning_rate": 0.00015445341019873634, |
|
"loss": 0.0312, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.6382978723404253, |
|
"grad_norm": 0.45384731888771057, |
|
"learning_rate": 0.0001541908010345525, |
|
"loss": 0.0293, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.645390070921986, |
|
"grad_norm": 0.3378404676914215, |
|
"learning_rate": 0.0001539276617047577, |
|
"loss": 0.029, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.652482269503546, |
|
"grad_norm": 0.4775332510471344, |
|
"learning_rate": 0.00015366399478372662, |
|
"loss": 0.0294, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 0.3015674352645874, |
|
"learning_rate": 0.0001533998028509954, |
|
"loss": 0.0315, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.27968233823776245, |
|
"learning_rate": 0.00015313508849123668, |
|
"loss": 0.0273, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.673758865248227, |
|
"grad_norm": 0.40850046277046204, |
|
"learning_rate": 0.00015286985429423404, |
|
"loss": 0.0247, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.6808510638297873, |
|
"grad_norm": 0.2494577169418335, |
|
"learning_rate": 0.00015260410285485693, |
|
"loss": 0.0235, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.6879432624113475, |
|
"grad_norm": 0.24964429438114166, |
|
"learning_rate": 0.00015233783677303498, |
|
"loss": 0.0259, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.6950354609929077, |
|
"grad_norm": 0.28433412313461304, |
|
"learning_rate": 0.00015207105865373295, |
|
"loss": 0.0372, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.702127659574468, |
|
"grad_norm": 0.2023301124572754, |
|
"learning_rate": 0.0001518037711069248, |
|
"loss": 0.0275, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.7092198581560285, |
|
"grad_norm": 0.26986822485923767, |
|
"learning_rate": 0.0001515359767475685, |
|
"loss": 0.0284, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.7163120567375887, |
|
"grad_norm": 0.2430671602487564, |
|
"learning_rate": 0.00015126767819558022, |
|
"loss": 0.0241, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.723404255319149, |
|
"grad_norm": 0.2965194582939148, |
|
"learning_rate": 0.00015099887807580904, |
|
"loss": 0.029, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.7304964539007095, |
|
"grad_norm": 0.2736563980579376, |
|
"learning_rate": 0.00015072957901801076, |
|
"loss": 0.0264, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.7375886524822697, |
|
"grad_norm": 0.233638733625412, |
|
"learning_rate": 0.00015045978365682257, |
|
"loss": 0.0298, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.74468085106383, |
|
"grad_norm": 0.350392609834671, |
|
"learning_rate": 0.0001501894946317372, |
|
"loss": 0.0278, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.75177304964539, |
|
"grad_norm": 0.2875712215900421, |
|
"learning_rate": 0.00014991871458707698, |
|
"loss": 0.0302, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.7588652482269502, |
|
"grad_norm": 0.3458658754825592, |
|
"learning_rate": 0.000149647446171968, |
|
"loss": 0.0316, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.7659574468085104, |
|
"grad_norm": 0.2638990879058838, |
|
"learning_rate": 0.00014937569204031436, |
|
"loss": 0.0248, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.773049645390071, |
|
"grad_norm": 0.34576040506362915, |
|
"learning_rate": 0.00014910345485077197, |
|
"loss": 0.0211, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.780141843971631, |
|
"grad_norm": 0.3976755738258362, |
|
"learning_rate": 0.00014883073726672269, |
|
"loss": 0.0225, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.7872340425531914, |
|
"grad_norm": 0.36923620104789734, |
|
"learning_rate": 0.00014855754195624822, |
|
"loss": 0.0283, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.794326241134752, |
|
"grad_norm": 0.3352384865283966, |
|
"learning_rate": 0.00014828387159210397, |
|
"loss": 0.0334, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.801418439716312, |
|
"grad_norm": 0.30438244342803955, |
|
"learning_rate": 0.00014800972885169303, |
|
"loss": 0.0322, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.8085106382978724, |
|
"grad_norm": 0.3594920337200165, |
|
"learning_rate": 0.00014773511641703987, |
|
"loss": 0.0399, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.8156028368794326, |
|
"grad_norm": 0.31048035621643066, |
|
"learning_rate": 0.00014746003697476404, |
|
"loss": 0.0273, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.8226950354609928, |
|
"grad_norm": 0.40405237674713135, |
|
"learning_rate": 0.0001471844932160541, |
|
"loss": 0.0271, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.829787234042553, |
|
"grad_norm": 0.2698522210121155, |
|
"learning_rate": 0.00014690848783664108, |
|
"loss": 0.0241, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.8368794326241136, |
|
"grad_norm": 0.3636997640132904, |
|
"learning_rate": 0.00014663202353677222, |
|
"loss": 0.0233, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.8439716312056738, |
|
"grad_norm": 0.24219240248203278, |
|
"learning_rate": 0.00014635510302118452, |
|
"loss": 0.0228, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.851063829787234, |
|
"grad_norm": 0.309758722782135, |
|
"learning_rate": 0.00014607772899907824, |
|
"loss": 0.0217, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.8581560283687946, |
|
"grad_norm": 0.33416473865509033, |
|
"learning_rate": 0.0001457999041840906, |
|
"loss": 0.0236, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.8652482269503547, |
|
"grad_norm": 0.30040785670280457, |
|
"learning_rate": 0.00014552163129426875, |
|
"loss": 0.0273, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.872340425531915, |
|
"grad_norm": 0.35119491815567017, |
|
"learning_rate": 0.00014524291305204382, |
|
"loss": 0.0286, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.879432624113475, |
|
"grad_norm": 0.31673938035964966, |
|
"learning_rate": 0.00014496375218420383, |
|
"loss": 0.0292, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.8865248226950353, |
|
"grad_norm": 0.20967337489128113, |
|
"learning_rate": 0.00014468415142186708, |
|
"loss": 0.0298, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.8936170212765955, |
|
"grad_norm": 0.5264660120010376, |
|
"learning_rate": 0.0001444041135004556, |
|
"loss": 0.0216, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.900709219858156, |
|
"grad_norm": 0.3425130248069763, |
|
"learning_rate": 0.0001441236411596683, |
|
"loss": 0.0239, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.9078014184397163, |
|
"grad_norm": 0.2979462742805481, |
|
"learning_rate": 0.00014384273714345403, |
|
"loss": 0.0243, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.9148936170212765, |
|
"grad_norm": 0.2593551576137543, |
|
"learning_rate": 0.00014356140419998493, |
|
"loss": 0.0236, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.921985815602837, |
|
"grad_norm": 0.41503509879112244, |
|
"learning_rate": 0.0001432796450816295, |
|
"loss": 0.0235, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.9290780141843973, |
|
"grad_norm": 0.30138614773750305, |
|
"learning_rate": 0.00014299746254492555, |
|
"loss": 0.0209, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.9361702127659575, |
|
"grad_norm": 0.34066081047058105, |
|
"learning_rate": 0.00014271485935055347, |
|
"loss": 0.022, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.9432624113475176, |
|
"grad_norm": 0.45455053448677063, |
|
"learning_rate": 0.00014243183826330894, |
|
"loss": 0.0303, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.950354609929078, |
|
"grad_norm": 0.19702021777629852, |
|
"learning_rate": 0.00014214840205207605, |
|
"loss": 0.0226, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.9574468085106385, |
|
"grad_norm": 0.3477088212966919, |
|
"learning_rate": 0.0001418645534898002, |
|
"loss": 0.0233, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.9645390070921986, |
|
"grad_norm": 0.349435955286026, |
|
"learning_rate": 0.00014158029535346096, |
|
"loss": 0.0252, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.971631205673759, |
|
"grad_norm": 0.1625533252954483, |
|
"learning_rate": 0.00014129563042404483, |
|
"loss": 0.0217, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.978723404255319, |
|
"grad_norm": 0.19161160290241241, |
|
"learning_rate": 0.00014101056148651823, |
|
"loss": 0.0173, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.9858156028368796, |
|
"grad_norm": 0.22051677107810974, |
|
"learning_rate": 0.00014072509132979994, |
|
"loss": 0.0263, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.99290780141844, |
|
"grad_norm": 0.3433932960033417, |
|
"learning_rate": 0.0001404392227467341, |
|
"loss": 0.02, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.5353679060935974, |
|
"learning_rate": 0.0001401529585340628, |
|
"loss": 0.0218, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 3.00709219858156, |
|
"grad_norm": 0.2597915232181549, |
|
"learning_rate": 0.0001398663014923986, |
|
"loss": 0.0175, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 3.0141843971631204, |
|
"grad_norm": 0.38695409893989563, |
|
"learning_rate": 0.00013957925442619737, |
|
"loss": 0.0281, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.021276595744681, |
|
"grad_norm": 0.33511126041412354, |
|
"learning_rate": 0.00013929182014373054, |
|
"loss": 0.0211, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 3.028368794326241, |
|
"grad_norm": 0.2662104368209839, |
|
"learning_rate": 0.00013900400145705794, |
|
"loss": 0.0252, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 3.0354609929078014, |
|
"grad_norm": 0.4052877128124237, |
|
"learning_rate": 0.00013871580118200006, |
|
"loss": 0.0245, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 3.0425531914893615, |
|
"grad_norm": 0.428830623626709, |
|
"learning_rate": 0.0001384272221381107, |
|
"loss": 0.0396, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 3.049645390070922, |
|
"grad_norm": 0.20592275261878967, |
|
"learning_rate": 0.0001381382671486491, |
|
"loss": 0.0195, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.0567375886524824, |
|
"grad_norm": 0.29792851209640503, |
|
"learning_rate": 0.00013784893904055266, |
|
"loss": 0.0221, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 3.0638297872340425, |
|
"grad_norm": 0.3490282893180847, |
|
"learning_rate": 0.00013755924064440904, |
|
"loss": 0.0255, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 3.0709219858156027, |
|
"grad_norm": 0.245464026927948, |
|
"learning_rate": 0.00013726917479442855, |
|
"loss": 0.0239, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 3.078014184397163, |
|
"grad_norm": 0.28532829880714417, |
|
"learning_rate": 0.00013697874432841637, |
|
"loss": 0.0206, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 3.0851063829787235, |
|
"grad_norm": 0.45370057225227356, |
|
"learning_rate": 0.00013668795208774496, |
|
"loss": 0.0261, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.0921985815602837, |
|
"grad_norm": 0.39404305815696716, |
|
"learning_rate": 0.00013639680091732603, |
|
"loss": 0.0244, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 3.099290780141844, |
|
"grad_norm": 0.3023253083229065, |
|
"learning_rate": 0.00013610529366558282, |
|
"loss": 0.0237, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 3.106382978723404, |
|
"grad_norm": 0.22088029980659485, |
|
"learning_rate": 0.00013581343318442226, |
|
"loss": 0.0185, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 3.1134751773049647, |
|
"grad_norm": 0.28878822922706604, |
|
"learning_rate": 0.00013552122232920707, |
|
"loss": 0.0264, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 3.120567375886525, |
|
"grad_norm": 0.2687053084373474, |
|
"learning_rate": 0.00013522866395872758, |
|
"loss": 0.0206, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.127659574468085, |
|
"grad_norm": 0.32471123337745667, |
|
"learning_rate": 0.00013493576093517434, |
|
"loss": 0.0211, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 3.1347517730496453, |
|
"grad_norm": 0.19945155084133148, |
|
"learning_rate": 0.00013464251612410936, |
|
"loss": 0.0225, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 3.141843971631206, |
|
"grad_norm": 0.203238844871521, |
|
"learning_rate": 0.00013434893239443877, |
|
"loss": 0.019, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 3.148936170212766, |
|
"grad_norm": 0.2069423794746399, |
|
"learning_rate": 0.00013405501261838423, |
|
"loss": 0.0246, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 3.1560283687943262, |
|
"grad_norm": 0.3187784254550934, |
|
"learning_rate": 0.00013376075967145524, |
|
"loss": 0.0222, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.1631205673758864, |
|
"grad_norm": 0.31714534759521484, |
|
"learning_rate": 0.00013346617643242062, |
|
"loss": 0.0246, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 3.1702127659574466, |
|
"grad_norm": 0.4024612605571747, |
|
"learning_rate": 0.00013317126578328065, |
|
"loss": 0.0282, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 3.1773049645390072, |
|
"grad_norm": 0.2583388388156891, |
|
"learning_rate": 0.00013287603060923876, |
|
"loss": 0.0184, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 3.1843971631205674, |
|
"grad_norm": 0.28229662775993347, |
|
"learning_rate": 0.00013258047379867334, |
|
"loss": 0.0259, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"grad_norm": 0.2962813973426819, |
|
"learning_rate": 0.00013228459824310936, |
|
"loss": 0.0327, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.198581560283688, |
|
"grad_norm": 0.2835753858089447, |
|
"learning_rate": 0.00013198840683719022, |
|
"loss": 0.0174, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 3.2056737588652484, |
|
"grad_norm": 0.3171481788158417, |
|
"learning_rate": 0.00013169190247864943, |
|
"loss": 0.0315, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 3.2127659574468086, |
|
"grad_norm": 0.3521289527416229, |
|
"learning_rate": 0.0001313950880682821, |
|
"loss": 0.0209, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 3.219858156028369, |
|
"grad_norm": 0.3271763324737549, |
|
"learning_rate": 0.00013109796650991683, |
|
"loss": 0.0183, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 3.226950354609929, |
|
"grad_norm": 0.29443103075027466, |
|
"learning_rate": 0.00013080054071038698, |
|
"loss": 0.0285, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.2340425531914896, |
|
"grad_norm": 0.35706964135169983, |
|
"learning_rate": 0.00013050281357950255, |
|
"loss": 0.026, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 3.2411347517730498, |
|
"grad_norm": 0.3586270809173584, |
|
"learning_rate": 0.00013020478803002142, |
|
"loss": 0.025, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 3.24822695035461, |
|
"grad_norm": 0.2771762013435364, |
|
"learning_rate": 0.00012990646697762107, |
|
"loss": 0.0222, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 3.25531914893617, |
|
"grad_norm": 0.22596819698810577, |
|
"learning_rate": 0.00012960785334087, |
|
"loss": 0.0211, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 3.2624113475177303, |
|
"grad_norm": 0.32333022356033325, |
|
"learning_rate": 0.00012930895004119907, |
|
"loss": 0.0189, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.269503546099291, |
|
"grad_norm": 0.35000088810920715, |
|
"learning_rate": 0.00012900976000287313, |
|
"loss": 0.0284, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 3.276595744680851, |
|
"grad_norm": 0.3429652154445648, |
|
"learning_rate": 0.00012871028615296212, |
|
"loss": 0.0307, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 3.2836879432624113, |
|
"grad_norm": 0.2919664978981018, |
|
"learning_rate": 0.00012841053142131272, |
|
"loss": 0.0226, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 3.2907801418439715, |
|
"grad_norm": 0.30846384167671204, |
|
"learning_rate": 0.00012811049874051955, |
|
"loss": 0.0199, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 3.297872340425532, |
|
"grad_norm": 0.4330957531929016, |
|
"learning_rate": 0.00012781019104589645, |
|
"loss": 0.0283, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.3049645390070923, |
|
"grad_norm": 0.2327142059803009, |
|
"learning_rate": 0.0001275096112754478, |
|
"loss": 0.0295, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 3.3120567375886525, |
|
"grad_norm": 0.32682859897613525, |
|
"learning_rate": 0.00012720876236983988, |
|
"loss": 0.0364, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 3.3191489361702127, |
|
"grad_norm": 0.25689056515693665, |
|
"learning_rate": 0.00012690764727237193, |
|
"loss": 0.02, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 3.326241134751773, |
|
"grad_norm": 0.275511234998703, |
|
"learning_rate": 0.0001266062689289474, |
|
"loss": 0.0307, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.3178229331970215, |
|
"learning_rate": 0.00012630463028804513, |
|
"loss": 0.0269, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.3404255319148937, |
|
"grad_norm": 0.2833107113838196, |
|
"learning_rate": 0.00012600273430069073, |
|
"loss": 0.0247, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 3.347517730496454, |
|
"grad_norm": 0.3253328800201416, |
|
"learning_rate": 0.0001257005839204273, |
|
"loss": 0.0217, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 3.354609929078014, |
|
"grad_norm": 0.26066187024116516, |
|
"learning_rate": 0.00012539818210328683, |
|
"loss": 0.0201, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 3.3617021276595747, |
|
"grad_norm": 0.24645408987998962, |
|
"learning_rate": 0.0001250955318077612, |
|
"loss": 0.0315, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 3.368794326241135, |
|
"grad_norm": 0.3252389132976532, |
|
"learning_rate": 0.00012479263599477318, |
|
"loss": 0.0203, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.375886524822695, |
|
"grad_norm": 0.21474260091781616, |
|
"learning_rate": 0.00012448949762764762, |
|
"loss": 0.0207, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 3.382978723404255, |
|
"grad_norm": 0.22793646156787872, |
|
"learning_rate": 0.00012418611967208223, |
|
"loss": 0.0211, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 3.3900709219858154, |
|
"grad_norm": 0.2615390419960022, |
|
"learning_rate": 0.00012388250509611876, |
|
"loss": 0.0344, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 3.397163120567376, |
|
"grad_norm": 0.4156443476676941, |
|
"learning_rate": 0.00012357865687011389, |
|
"loss": 0.0299, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 3.404255319148936, |
|
"grad_norm": 0.49900200963020325, |
|
"learning_rate": 0.00012327457796671015, |
|
"loss": 0.0245, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.4113475177304964, |
|
"grad_norm": 0.37122806906700134, |
|
"learning_rate": 0.00012297027136080687, |
|
"loss": 0.0276, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 3.4184397163120566, |
|
"grad_norm": 0.28465649485588074, |
|
"learning_rate": 0.00012266574002953108, |
|
"loss": 0.0196, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 3.425531914893617, |
|
"grad_norm": 0.24766407907009125, |
|
"learning_rate": 0.00012236098695220831, |
|
"loss": 0.0256, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 3.4326241134751774, |
|
"grad_norm": 0.27211466431617737, |
|
"learning_rate": 0.0001220560151103336, |
|
"loss": 0.0284, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 3.4397163120567376, |
|
"grad_norm": 0.2607908546924591, |
|
"learning_rate": 0.00012175082748754212, |
|
"loss": 0.0213, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.4468085106382977, |
|
"grad_norm": 0.22450798749923706, |
|
"learning_rate": 0.0001214454270695802, |
|
"loss": 0.0243, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 3.453900709219858, |
|
"grad_norm": 0.2559250295162201, |
|
"learning_rate": 0.00012113981684427591, |
|
"loss": 0.0302, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 3.4609929078014185, |
|
"grad_norm": 0.4311963617801666, |
|
"learning_rate": 0.00012083399980151, |
|
"loss": 0.0275, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 3.4680851063829787, |
|
"grad_norm": 0.24473054707050323, |
|
"learning_rate": 0.00012052797893318657, |
|
"loss": 0.0251, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 3.475177304964539, |
|
"grad_norm": 0.24619214236736298, |
|
"learning_rate": 0.00012022175723320381, |
|
"loss": 0.0198, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.482269503546099, |
|
"grad_norm": 0.3668628931045532, |
|
"learning_rate": 0.00011991533769742469, |
|
"loss": 0.0313, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 3.4893617021276597, |
|
"grad_norm": 0.4206676483154297, |
|
"learning_rate": 0.00011960872332364765, |
|
"loss": 0.0296, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 3.49645390070922, |
|
"grad_norm": 0.28001976013183594, |
|
"learning_rate": 0.00011930191711157737, |
|
"loss": 0.0243, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 3.50354609929078, |
|
"grad_norm": 0.2904788553714752, |
|
"learning_rate": 0.00011899492206279524, |
|
"loss": 0.0215, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 3.5106382978723403, |
|
"grad_norm": 0.3145068883895874, |
|
"learning_rate": 0.0001186877411807302, |
|
"loss": 0.0254, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.5177304964539005, |
|
"grad_norm": 0.42452743649482727, |
|
"learning_rate": 0.0001183803774706292, |
|
"loss": 0.0249, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 3.524822695035461, |
|
"grad_norm": 0.4683021903038025, |
|
"learning_rate": 0.00011807283393952786, |
|
"loss": 0.0218, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 3.5319148936170213, |
|
"grad_norm": 0.20090143382549286, |
|
"learning_rate": 0.00011776511359622105, |
|
"loss": 0.0187, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 3.5390070921985815, |
|
"grad_norm": 0.20158180594444275, |
|
"learning_rate": 0.00011745721945123343, |
|
"loss": 0.0263, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 3.546099290780142, |
|
"grad_norm": 0.2160181701183319, |
|
"learning_rate": 0.00011714915451679003, |
|
"loss": 0.0253, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.5531914893617023, |
|
"grad_norm": 0.2958919405937195, |
|
"learning_rate": 0.00011684092180678683, |
|
"loss": 0.0276, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 3.5602836879432624, |
|
"grad_norm": 0.3748587667942047, |
|
"learning_rate": 0.00011653252433676108, |
|
"loss": 0.0244, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 3.5673758865248226, |
|
"grad_norm": 0.1788649708032608, |
|
"learning_rate": 0.00011622396512386202, |
|
"loss": 0.0217, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 3.574468085106383, |
|
"grad_norm": 0.21001924574375153, |
|
"learning_rate": 0.00011591524718682127, |
|
"loss": 0.019, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 3.581560283687943, |
|
"grad_norm": 0.22885674238204956, |
|
"learning_rate": 0.00011560637354592332, |
|
"loss": 0.0185, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 3.5886524822695036, |
|
"grad_norm": 0.29409468173980713, |
|
"learning_rate": 0.0001152973472229758, |
|
"loss": 0.0167, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 3.595744680851064, |
|
"grad_norm": 0.30863863229751587, |
|
"learning_rate": 0.00011498817124128032, |
|
"loss": 0.0254, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 3.602836879432624, |
|
"grad_norm": 0.18340665102005005, |
|
"learning_rate": 0.00011467884862560245, |
|
"loss": 0.0255, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 3.6099290780141846, |
|
"grad_norm": 0.28949764370918274, |
|
"learning_rate": 0.00011436938240214241, |
|
"loss": 0.0303, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 3.617021276595745, |
|
"grad_norm": 0.19286899268627167, |
|
"learning_rate": 0.0001140597755985054, |
|
"loss": 0.0284, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.624113475177305, |
|
"grad_norm": 0.4090544879436493, |
|
"learning_rate": 0.00011375003124367192, |
|
"loss": 0.0218, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 3.631205673758865, |
|
"grad_norm": 0.32162442803382874, |
|
"learning_rate": 0.00011344015236796822, |
|
"loss": 0.0253, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 3.6382978723404253, |
|
"grad_norm": 0.2677665054798126, |
|
"learning_rate": 0.00011313014200303647, |
|
"loss": 0.0169, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 3.645390070921986, |
|
"grad_norm": 0.4189298152923584, |
|
"learning_rate": 0.00011282000318180545, |
|
"loss": 0.0205, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 3.652482269503546, |
|
"grad_norm": 0.2897457778453827, |
|
"learning_rate": 0.00011250973893846055, |
|
"loss": 0.0207, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 3.6595744680851063, |
|
"grad_norm": 0.21248039603233337, |
|
"learning_rate": 0.00011219935230841421, |
|
"loss": 0.0221, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 3.6666666666666665, |
|
"grad_norm": 0.23816858232021332, |
|
"learning_rate": 0.00011188884632827619, |
|
"loss": 0.0209, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 3.673758865248227, |
|
"grad_norm": 0.24563542008399963, |
|
"learning_rate": 0.00011157822403582399, |
|
"loss": 0.0233, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 3.6808510638297873, |
|
"grad_norm": 0.23103229701519012, |
|
"learning_rate": 0.0001112674884699729, |
|
"loss": 0.0157, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 3.6879432624113475, |
|
"grad_norm": 0.27050310373306274, |
|
"learning_rate": 0.00011095664267074655, |
|
"loss": 0.0214, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.6950354609929077, |
|
"grad_norm": 0.16521809995174408, |
|
"learning_rate": 0.00011064568967924683, |
|
"loss": 0.0224, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 3.702127659574468, |
|
"grad_norm": 0.19685564935207367, |
|
"learning_rate": 0.00011033463253762452, |
|
"loss": 0.0157, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 3.7092198581560285, |
|
"grad_norm": 0.2071635127067566, |
|
"learning_rate": 0.0001100234742890492, |
|
"loss": 0.0196, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 3.7163120567375887, |
|
"grad_norm": 0.19163092970848083, |
|
"learning_rate": 0.00010971221797767966, |
|
"loss": 0.0183, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"grad_norm": 0.43572723865509033, |
|
"learning_rate": 0.00010940086664863404, |
|
"loss": 0.0189, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.7304964539007095, |
|
"grad_norm": 0.19704580307006836, |
|
"learning_rate": 0.00010908942334796015, |
|
"loss": 0.0213, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 3.7375886524822697, |
|
"grad_norm": 0.30172115564346313, |
|
"learning_rate": 0.00010877789112260551, |
|
"loss": 0.0242, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 3.74468085106383, |
|
"grad_norm": 0.2991447150707245, |
|
"learning_rate": 0.00010846627302038756, |
|
"loss": 0.0163, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 3.75177304964539, |
|
"grad_norm": 0.2086688131093979, |
|
"learning_rate": 0.00010815457208996407, |
|
"loss": 0.0162, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 3.7588652482269502, |
|
"grad_norm": 0.25282007455825806, |
|
"learning_rate": 0.000107842791380803, |
|
"loss": 0.0292, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.7659574468085104, |
|
"grad_norm": 0.21109668910503387, |
|
"learning_rate": 0.0001075309339431529, |
|
"loss": 0.0283, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 3.773049645390071, |
|
"grad_norm": 0.212016299366951, |
|
"learning_rate": 0.00010721900282801287, |
|
"loss": 0.0248, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 3.780141843971631, |
|
"grad_norm": 0.19006933271884918, |
|
"learning_rate": 0.00010690700108710297, |
|
"loss": 0.0247, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 3.7872340425531914, |
|
"grad_norm": 0.28409719467163086, |
|
"learning_rate": 0.00010659493177283408, |
|
"loss": 0.0236, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 3.794326241134752, |
|
"grad_norm": 0.3740707337856293, |
|
"learning_rate": 0.00010628279793827825, |
|
"loss": 0.0244, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 3.801418439716312, |
|
"grad_norm": 0.20624110102653503, |
|
"learning_rate": 0.00010597060263713872, |
|
"loss": 0.0213, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 3.8085106382978724, |
|
"grad_norm": 0.2841550409793854, |
|
"learning_rate": 0.0001056583489237201, |
|
"loss": 0.0156, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 3.8156028368794326, |
|
"grad_norm": 0.3967061936855316, |
|
"learning_rate": 0.00010534603985289844, |
|
"loss": 0.0233, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 3.8226950354609928, |
|
"grad_norm": 0.33291056752204895, |
|
"learning_rate": 0.00010503367848009133, |
|
"loss": 0.0214, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 3.829787234042553, |
|
"grad_norm": 0.3669842779636383, |
|
"learning_rate": 0.00010472126786122818, |
|
"loss": 0.0255, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.8368794326241136, |
|
"grad_norm": 0.2933257520198822, |
|
"learning_rate": 0.00010440881105272007, |
|
"loss": 0.021, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 3.8439716312056738, |
|
"grad_norm": 0.1543368250131607, |
|
"learning_rate": 0.00010409631111142997, |
|
"loss": 0.0206, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 3.851063829787234, |
|
"grad_norm": 0.2014048546552658, |
|
"learning_rate": 0.0001037837710946429, |
|
"loss": 0.0198, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 3.8581560283687946, |
|
"grad_norm": 0.2781049907207489, |
|
"learning_rate": 0.00010347119406003592, |
|
"loss": 0.0261, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 3.8652482269503547, |
|
"grad_norm": 0.26720258593559265, |
|
"learning_rate": 0.0001031585830656482, |
|
"loss": 0.0216, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 3.872340425531915, |
|
"grad_norm": 0.23284177482128143, |
|
"learning_rate": 0.00010284594116985125, |
|
"loss": 0.0195, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 3.879432624113475, |
|
"grad_norm": 0.36926499009132385, |
|
"learning_rate": 0.00010253327143131879, |
|
"loss": 0.0226, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 3.8865248226950353, |
|
"grad_norm": 0.3114331066608429, |
|
"learning_rate": 0.00010222057690899705, |
|
"loss": 0.0242, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 3.8936170212765955, |
|
"grad_norm": 0.2598218023777008, |
|
"learning_rate": 0.00010190786066207458, |
|
"loss": 0.0174, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 3.900709219858156, |
|
"grad_norm": 0.21172010898590088, |
|
"learning_rate": 0.00010159512574995258, |
|
"loss": 0.0164, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.9078014184397163, |
|
"grad_norm": 0.23221197724342346, |
|
"learning_rate": 0.00010128237523221487, |
|
"loss": 0.0174, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 3.9148936170212765, |
|
"grad_norm": 0.21310311555862427, |
|
"learning_rate": 0.00010096961216859787, |
|
"loss": 0.0186, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 3.921985815602837, |
|
"grad_norm": 0.22581326961517334, |
|
"learning_rate": 0.00010065683961896074, |
|
"loss": 0.0201, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 3.9290780141843973, |
|
"grad_norm": 0.36404818296432495, |
|
"learning_rate": 0.00010034406064325553, |
|
"loss": 0.0188, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 3.9361702127659575, |
|
"grad_norm": 0.2163950651884079, |
|
"learning_rate": 0.00010003127830149706, |
|
"loss": 0.0159, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 3.9432624113475176, |
|
"grad_norm": 0.5740591883659363, |
|
"learning_rate": 9.971849565373317e-05, |
|
"loss": 0.0293, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 3.950354609929078, |
|
"grad_norm": 0.31515783071517944, |
|
"learning_rate": 9.940571576001465e-05, |
|
"loss": 0.0214, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 3.9574468085106385, |
|
"grad_norm": 0.3176766335964203, |
|
"learning_rate": 9.909294168036531e-05, |
|
"loss": 0.018, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 3.9645390070921986, |
|
"grad_norm": 0.2528095245361328, |
|
"learning_rate": 9.87801764747521e-05, |
|
"loss": 0.0221, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 3.971631205673759, |
|
"grad_norm": 0.30647730827331543, |
|
"learning_rate": 9.846742320305527e-05, |
|
"loss": 0.0261, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.978723404255319, |
|
"grad_norm": 0.2308386266231537, |
|
"learning_rate": 9.815468492503812e-05, |
|
"loss": 0.017, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 3.9858156028368796, |
|
"grad_norm": 0.21092812716960907, |
|
"learning_rate": 9.78419647003174e-05, |
|
"loss": 0.023, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 3.99290780141844, |
|
"grad_norm": 0.3230001628398895, |
|
"learning_rate": 9.752926558833317e-05, |
|
"loss": 0.0174, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.3302082419395447, |
|
"learning_rate": 9.721659064831895e-05, |
|
"loss": 0.0206, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 4.00709219858156, |
|
"grad_norm": 0.2825748026371002, |
|
"learning_rate": 9.690394293927189e-05, |
|
"loss": 0.0203, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 4.01418439716312, |
|
"grad_norm": 0.2544749081134796, |
|
"learning_rate": 9.659132551992248e-05, |
|
"loss": 0.0191, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 4.0212765957446805, |
|
"grad_norm": 0.19003655016422272, |
|
"learning_rate": 9.627874144870514e-05, |
|
"loss": 0.0167, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 4.028368794326241, |
|
"grad_norm": 0.18228965997695923, |
|
"learning_rate": 9.596619378372794e-05, |
|
"loss": 0.0286, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 4.035460992907802, |
|
"grad_norm": 0.27924972772598267, |
|
"learning_rate": 9.565368558274266e-05, |
|
"loss": 0.0126, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 4.042553191489362, |
|
"grad_norm": 0.1970324069261551, |
|
"learning_rate": 9.534121990311515e-05, |
|
"loss": 0.0192, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 4.049645390070922, |
|
"grad_norm": 0.37134411931037903, |
|
"learning_rate": 9.502879980179525e-05, |
|
"loss": 0.0233, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 4.056737588652482, |
|
"grad_norm": 0.20801763236522675, |
|
"learning_rate": 9.471642833528673e-05, |
|
"loss": 0.0176, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 4.0638297872340425, |
|
"grad_norm": 0.19383108615875244, |
|
"learning_rate": 9.440410855961776e-05, |
|
"loss": 0.0228, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 4.070921985815603, |
|
"grad_norm": 0.21508440375328064, |
|
"learning_rate": 9.409184353031068e-05, |
|
"loss": 0.0163, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 4.078014184397163, |
|
"grad_norm": 0.2944611608982086, |
|
"learning_rate": 9.377963630235225e-05, |
|
"loss": 0.0219, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 4.085106382978723, |
|
"grad_norm": 0.23027314245700836, |
|
"learning_rate": 9.346748993016377e-05, |
|
"loss": 0.0177, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 4.092198581560283, |
|
"grad_norm": 0.17456182837486267, |
|
"learning_rate": 9.315540746757108e-05, |
|
"loss": 0.0157, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 4.099290780141844, |
|
"grad_norm": 0.34717416763305664, |
|
"learning_rate": 9.284339196777491e-05, |
|
"loss": 0.0244, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 4.1063829787234045, |
|
"grad_norm": 0.38114133477211, |
|
"learning_rate": 9.25314464833208e-05, |
|
"loss": 0.0189, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 4.113475177304965, |
|
"grad_norm": 0.3224876821041107, |
|
"learning_rate": 9.221957406606926e-05, |
|
"loss": 0.0196, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 4.120567375886525, |
|
"grad_norm": 0.38748404383659363, |
|
"learning_rate": 9.190777776716606e-05, |
|
"loss": 0.0271, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 4.127659574468085, |
|
"grad_norm": 0.3015083074569702, |
|
"learning_rate": 9.159606063701221e-05, |
|
"loss": 0.0263, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 4.134751773049645, |
|
"grad_norm": 0.23368023335933685, |
|
"learning_rate": 9.128442572523417e-05, |
|
"loss": 0.0199, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 4.141843971631205, |
|
"grad_norm": 0.209278866648674, |
|
"learning_rate": 9.097287608065414e-05, |
|
"loss": 0.0157, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 4.148936170212766, |
|
"grad_norm": 0.21174615621566772, |
|
"learning_rate": 9.066141475126003e-05, |
|
"loss": 0.0161, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 4.156028368794326, |
|
"grad_norm": 0.22363576292991638, |
|
"learning_rate": 9.035004478417573e-05, |
|
"loss": 0.0202, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 4.163120567375887, |
|
"grad_norm": 0.22810956835746765, |
|
"learning_rate": 9.003876922563137e-05, |
|
"loss": 0.0179, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 4.170212765957447, |
|
"grad_norm": 0.1800043135881424, |
|
"learning_rate": 8.972759112093336e-05, |
|
"loss": 0.0235, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 4.177304964539007, |
|
"grad_norm": 0.2977979779243469, |
|
"learning_rate": 8.941651351443476e-05, |
|
"loss": 0.0167, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 4.184397163120567, |
|
"grad_norm": 0.1779855191707611, |
|
"learning_rate": 8.910553944950549e-05, |
|
"loss": 0.0156, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 4.191489361702128, |
|
"grad_norm": 0.20011846721172333, |
|
"learning_rate": 8.879467196850229e-05, |
|
"loss": 0.0252, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 4.198581560283688, |
|
"grad_norm": 0.12876836955547333, |
|
"learning_rate": 8.848391411273933e-05, |
|
"loss": 0.0191, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 4.205673758865248, |
|
"grad_norm": 0.24333477020263672, |
|
"learning_rate": 8.817326892245825e-05, |
|
"loss": 0.0196, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 4.212765957446808, |
|
"grad_norm": 0.1953645497560501, |
|
"learning_rate": 8.786273943679835e-05, |
|
"loss": 0.0225, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 4.219858156028369, |
|
"grad_norm": 0.25837603211402893, |
|
"learning_rate": 8.755232869376706e-05, |
|
"loss": 0.0155, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 4.226950354609929, |
|
"grad_norm": 0.24299311637878418, |
|
"learning_rate": 8.724203973021015e-05, |
|
"loss": 0.0168, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 4.23404255319149, |
|
"grad_norm": 0.27927613258361816, |
|
"learning_rate": 8.693187558178181e-05, |
|
"loss": 0.0174, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 4.24113475177305, |
|
"grad_norm": 0.23098881542682648, |
|
"learning_rate": 8.662183928291532e-05, |
|
"loss": 0.0119, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 4.24822695035461, |
|
"grad_norm": 0.187413290143013, |
|
"learning_rate": 8.631193386679301e-05, |
|
"loss": 0.017, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"grad_norm": 0.36277079582214355, |
|
"learning_rate": 8.600216236531682e-05, |
|
"loss": 0.0249, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.26241134751773, |
|
"grad_norm": 0.2662680745124817, |
|
"learning_rate": 8.569252780907862e-05, |
|
"loss": 0.0165, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 4.2695035460992905, |
|
"grad_norm": 0.12048438936471939, |
|
"learning_rate": 8.538303322733032e-05, |
|
"loss": 0.0155, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 4.276595744680851, |
|
"grad_norm": 0.2885691523551941, |
|
"learning_rate": 8.507368164795462e-05, |
|
"loss": 0.0259, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 4.283687943262412, |
|
"grad_norm": 0.2707173526287079, |
|
"learning_rate": 8.476447609743508e-05, |
|
"loss": 0.0206, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 4.290780141843972, |
|
"grad_norm": 0.32506418228149414, |
|
"learning_rate": 8.44554196008266e-05, |
|
"loss": 0.0167, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 4.297872340425532, |
|
"grad_norm": 0.2805072069168091, |
|
"learning_rate": 8.414651518172583e-05, |
|
"loss": 0.0192, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 4.304964539007092, |
|
"grad_norm": 0.34258946776390076, |
|
"learning_rate": 8.383776586224175e-05, |
|
"loss": 0.0187, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 4.3120567375886525, |
|
"grad_norm": 0.21396967768669128, |
|
"learning_rate": 8.35291746629657e-05, |
|
"loss": 0.0139, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 4.319148936170213, |
|
"grad_norm": 0.2685137987136841, |
|
"learning_rate": 8.322074460294231e-05, |
|
"loss": 0.0153, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 4.326241134751773, |
|
"grad_norm": 0.22136232256889343, |
|
"learning_rate": 8.291247869963959e-05, |
|
"loss": 0.0256, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.333333333333333, |
|
"grad_norm": 0.2527240514755249, |
|
"learning_rate": 8.26043799689196e-05, |
|
"loss": 0.0219, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 4.340425531914893, |
|
"grad_norm": 0.12039966136217117, |
|
"learning_rate": 8.229645142500897e-05, |
|
"loss": 0.0169, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 4.347517730496454, |
|
"grad_norm": 0.2619229853153229, |
|
"learning_rate": 8.198869608046915e-05, |
|
"loss": 0.0245, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 4.3546099290780145, |
|
"grad_norm": 0.3446950316429138, |
|
"learning_rate": 8.168111694616733e-05, |
|
"loss": 0.0196, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 4.361702127659575, |
|
"grad_norm": 0.18836474418640137, |
|
"learning_rate": 8.137371703124671e-05, |
|
"loss": 0.0154, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 4.368794326241135, |
|
"grad_norm": 0.2591850757598877, |
|
"learning_rate": 8.106649934309706e-05, |
|
"loss": 0.0214, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 4.375886524822695, |
|
"grad_norm": 0.2989495098590851, |
|
"learning_rate": 8.075946688732545e-05, |
|
"loss": 0.0169, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 4.382978723404255, |
|
"grad_norm": 0.33893871307373047, |
|
"learning_rate": 8.045262266772675e-05, |
|
"loss": 0.0234, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 4.390070921985815, |
|
"grad_norm": 0.3758526146411896, |
|
"learning_rate": 8.01459696862542e-05, |
|
"loss": 0.0184, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 4.397163120567376, |
|
"grad_norm": 0.26383623480796814, |
|
"learning_rate": 7.983951094299022e-05, |
|
"loss": 0.0221, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 4.404255319148936, |
|
"grad_norm": 0.44497719407081604, |
|
"learning_rate": 7.953324943611677e-05, |
|
"loss": 0.024, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 4.411347517730497, |
|
"grad_norm": 0.3149394392967224, |
|
"learning_rate": 7.92271881618863e-05, |
|
"loss": 0.0266, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 4.418439716312057, |
|
"grad_norm": 0.19326764345169067, |
|
"learning_rate": 7.892133011459237e-05, |
|
"loss": 0.0179, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 4.425531914893617, |
|
"grad_norm": 0.2152886539697647, |
|
"learning_rate": 7.861567828654013e-05, |
|
"loss": 0.0213, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 4.432624113475177, |
|
"grad_norm": 0.22995711863040924, |
|
"learning_rate": 7.831023566801734e-05, |
|
"loss": 0.0152, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 4.439716312056738, |
|
"grad_norm": 0.28582632541656494, |
|
"learning_rate": 7.800500524726505e-05, |
|
"loss": 0.0237, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 4.446808510638298, |
|
"grad_norm": 0.2682250142097473, |
|
"learning_rate": 7.769999001044818e-05, |
|
"loss": 0.0198, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 4.453900709219858, |
|
"grad_norm": 0.3235504627227783, |
|
"learning_rate": 7.739519294162652e-05, |
|
"loss": 0.0186, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 4.460992907801418, |
|
"grad_norm": 0.3280167281627655, |
|
"learning_rate": 7.709061702272546e-05, |
|
"loss": 0.0168, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 4.468085106382979, |
|
"grad_norm": 0.2376112937927246, |
|
"learning_rate": 7.678626523350674e-05, |
|
"loss": 0.0208, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.475177304964539, |
|
"grad_norm": 0.38887420296669006, |
|
"learning_rate": 7.648214055153946e-05, |
|
"loss": 0.0146, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 4.4822695035460995, |
|
"grad_norm": 0.24783270061016083, |
|
"learning_rate": 7.617824595217074e-05, |
|
"loss": 0.0172, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 4.48936170212766, |
|
"grad_norm": 0.22741125524044037, |
|
"learning_rate": 7.587458440849691e-05, |
|
"loss": 0.0202, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 4.49645390070922, |
|
"grad_norm": 0.3385200798511505, |
|
"learning_rate": 7.557115889133408e-05, |
|
"loss": 0.0232, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 4.50354609929078, |
|
"grad_norm": 0.2820025384426117, |
|
"learning_rate": 7.526797236918929e-05, |
|
"loss": 0.0148, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 4.51063829787234, |
|
"grad_norm": 0.27540770173072815, |
|
"learning_rate": 7.496502780823141e-05, |
|
"loss": 0.0173, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 4.5177304964539005, |
|
"grad_norm": 0.23689982295036316, |
|
"learning_rate": 7.466232817226224e-05, |
|
"loss": 0.0192, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 4.524822695035461, |
|
"grad_norm": 0.31511813402175903, |
|
"learning_rate": 7.435987642268715e-05, |
|
"loss": 0.019, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 4.531914893617021, |
|
"grad_norm": 0.2491617202758789, |
|
"learning_rate": 7.405767551848662e-05, |
|
"loss": 0.0233, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 4.539007092198582, |
|
"grad_norm": 0.3146982192993164, |
|
"learning_rate": 7.37557284161869e-05, |
|
"loss": 0.02, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.546099290780142, |
|
"grad_norm": 0.1645408272743225, |
|
"learning_rate": 7.345403806983121e-05, |
|
"loss": 0.0195, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 4.553191489361702, |
|
"grad_norm": 0.23353220522403717, |
|
"learning_rate": 7.31526074309509e-05, |
|
"loss": 0.0197, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 4.560283687943262, |
|
"grad_norm": 0.2860185205936432, |
|
"learning_rate": 7.285143944853652e-05, |
|
"loss": 0.0199, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 4.567375886524823, |
|
"grad_norm": 0.23552881181240082, |
|
"learning_rate": 7.255053706900887e-05, |
|
"loss": 0.0145, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 4.574468085106383, |
|
"grad_norm": 0.41338714957237244, |
|
"learning_rate": 7.224990323619044e-05, |
|
"loss": 0.0194, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 4.581560283687943, |
|
"grad_norm": 0.23148952424526215, |
|
"learning_rate": 7.194954089127628e-05, |
|
"loss": 0.0166, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 4.588652482269503, |
|
"grad_norm": 0.26471659541130066, |
|
"learning_rate": 7.16494529728055e-05, |
|
"loss": 0.0147, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 4.595744680851064, |
|
"grad_norm": 0.22270764410495758, |
|
"learning_rate": 7.134964241663237e-05, |
|
"loss": 0.0132, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 4.602836879432624, |
|
"grad_norm": 0.1745089590549469, |
|
"learning_rate": 7.105011215589759e-05, |
|
"loss": 0.0122, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 4.609929078014185, |
|
"grad_norm": 0.12301220744848251, |
|
"learning_rate": 7.075086512099973e-05, |
|
"loss": 0.0143, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.617021276595745, |
|
"grad_norm": 0.29022157192230225, |
|
"learning_rate": 7.045190423956646e-05, |
|
"loss": 0.0279, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 4.624113475177305, |
|
"grad_norm": 0.2177857905626297, |
|
"learning_rate": 7.015323243642584e-05, |
|
"loss": 0.0216, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 4.631205673758865, |
|
"grad_norm": 0.31943878531455994, |
|
"learning_rate": 6.985485263357785e-05, |
|
"loss": 0.016, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 4.638297872340425, |
|
"grad_norm": 0.2381797432899475, |
|
"learning_rate": 6.955676775016579e-05, |
|
"loss": 0.0205, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 4.6453900709219855, |
|
"grad_norm": 0.26750218868255615, |
|
"learning_rate": 6.925898070244752e-05, |
|
"loss": 0.0231, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 4.652482269503546, |
|
"grad_norm": 0.2351475954055786, |
|
"learning_rate": 6.896149440376725e-05, |
|
"loss": 0.0128, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 4.659574468085106, |
|
"grad_norm": 0.2950522005558014, |
|
"learning_rate": 6.86643117645267e-05, |
|
"loss": 0.0223, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 0.24356043338775635, |
|
"learning_rate": 6.836743569215696e-05, |
|
"loss": 0.0151, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 4.673758865248227, |
|
"grad_norm": 0.20342320203781128, |
|
"learning_rate": 6.807086909108978e-05, |
|
"loss": 0.015, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 4.680851063829787, |
|
"grad_norm": 0.22289365530014038, |
|
"learning_rate": 6.777461486272925e-05, |
|
"loss": 0.0134, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.6879432624113475, |
|
"grad_norm": 0.17356853187084198, |
|
"learning_rate": 6.747867590542345e-05, |
|
"loss": 0.0111, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 4.695035460992908, |
|
"grad_norm": 0.1519446223974228, |
|
"learning_rate": 6.718305511443612e-05, |
|
"loss": 0.0172, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 4.702127659574468, |
|
"grad_norm": 0.22093936800956726, |
|
"learning_rate": 6.688775538191816e-05, |
|
"loss": 0.0144, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 4.709219858156028, |
|
"grad_norm": 0.29076847434043884, |
|
"learning_rate": 6.659277959687954e-05, |
|
"loss": 0.0147, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 4.716312056737589, |
|
"grad_norm": 0.4279550611972809, |
|
"learning_rate": 6.629813064516094e-05, |
|
"loss": 0.015, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 4.723404255319149, |
|
"grad_norm": 0.2095872014760971, |
|
"learning_rate": 6.600381140940544e-05, |
|
"loss": 0.017, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 4.7304964539007095, |
|
"grad_norm": 0.19063495099544525, |
|
"learning_rate": 6.570982476903061e-05, |
|
"loss": 0.0218, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 4.73758865248227, |
|
"grad_norm": 0.3333429992198944, |
|
"learning_rate": 6.541617360019985e-05, |
|
"loss": 0.0175, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 4.74468085106383, |
|
"grad_norm": 0.20596669614315033, |
|
"learning_rate": 6.512286077579478e-05, |
|
"loss": 0.0143, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 4.75177304964539, |
|
"grad_norm": 0.36356234550476074, |
|
"learning_rate": 6.48298891653868e-05, |
|
"loss": 0.0218, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 4.75886524822695, |
|
"grad_norm": 0.18473972380161285, |
|
"learning_rate": 6.453726163520906e-05, |
|
"loss": 0.0133, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 4.76595744680851, |
|
"grad_norm": 0.7571324110031128, |
|
"learning_rate": 6.424498104812852e-05, |
|
"loss": 0.016, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 4.773049645390071, |
|
"grad_norm": 0.24869713187217712, |
|
"learning_rate": 6.395305026361795e-05, |
|
"loss": 0.0212, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 4.780141843971631, |
|
"grad_norm": 0.1801852136850357, |
|
"learning_rate": 6.366147213772772e-05, |
|
"loss": 0.022, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"grad_norm": 0.31297555565834045, |
|
"learning_rate": 6.337024952305819e-05, |
|
"loss": 0.0178, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 4.794326241134752, |
|
"grad_norm": 0.1869523674249649, |
|
"learning_rate": 6.307938526873157e-05, |
|
"loss": 0.02, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 4.801418439716312, |
|
"grad_norm": 0.10907835513353348, |
|
"learning_rate": 6.278888222036411e-05, |
|
"loss": 0.0205, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 4.808510638297872, |
|
"grad_norm": 0.23566560447216034, |
|
"learning_rate": 6.249874322003833e-05, |
|
"loss": 0.0164, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 4.815602836879433, |
|
"grad_norm": 0.2034124732017517, |
|
"learning_rate": 6.220897110627504e-05, |
|
"loss": 0.014, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 4.822695035460993, |
|
"grad_norm": 0.2759285271167755, |
|
"learning_rate": 6.191956871400582e-05, |
|
"loss": 0.0257, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.829787234042553, |
|
"grad_norm": 0.36838316917419434, |
|
"learning_rate": 6.163053887454509e-05, |
|
"loss": 0.0189, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 4.836879432624113, |
|
"grad_norm": 0.21057933568954468, |
|
"learning_rate": 6.134188441556241e-05, |
|
"loss": 0.0168, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 4.843971631205674, |
|
"grad_norm": 0.22402699291706085, |
|
"learning_rate": 6.105360816105498e-05, |
|
"loss": 0.0191, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 4.851063829787234, |
|
"grad_norm": 0.20568257570266724, |
|
"learning_rate": 6.0765712931319826e-05, |
|
"loss": 0.02, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 4.858156028368795, |
|
"grad_norm": 0.1438162624835968, |
|
"learning_rate": 6.0478201542926316e-05, |
|
"loss": 0.0135, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 4.865248226950355, |
|
"grad_norm": 0.18207921087741852, |
|
"learning_rate": 6.019107680868859e-05, |
|
"loss": 0.0154, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 4.872340425531915, |
|
"grad_norm": 0.3172309398651123, |
|
"learning_rate": 5.990434153763804e-05, |
|
"loss": 0.0143, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 4.879432624113475, |
|
"grad_norm": 0.2266601026058197, |
|
"learning_rate": 5.9617998534995766e-05, |
|
"loss": 0.0212, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 4.886524822695035, |
|
"grad_norm": 0.20986783504486084, |
|
"learning_rate": 5.933205060214525e-05, |
|
"loss": 0.016, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 4.8936170212765955, |
|
"grad_norm": 0.19340595602989197, |
|
"learning_rate": 5.9046500536604796e-05, |
|
"loss": 0.0161, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.900709219858156, |
|
"grad_norm": 0.2367888242006302, |
|
"learning_rate": 5.8761351132000295e-05, |
|
"loss": 0.0196, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 4.907801418439716, |
|
"grad_norm": 0.30044275522232056, |
|
"learning_rate": 5.8476605178037925e-05, |
|
"loss": 0.0176, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 4.914893617021277, |
|
"grad_norm": 0.3268476128578186, |
|
"learning_rate": 5.819226546047667e-05, |
|
"loss": 0.018, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 4.921985815602837, |
|
"grad_norm": 0.30031996965408325, |
|
"learning_rate": 5.790833476110113e-05, |
|
"loss": 0.0155, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 4.929078014184397, |
|
"grad_norm": 0.2088995724916458, |
|
"learning_rate": 5.762481585769455e-05, |
|
"loss": 0.013, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 4.9361702127659575, |
|
"grad_norm": 0.23445673286914825, |
|
"learning_rate": 5.7341711524011224e-05, |
|
"loss": 0.019, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 4.943262411347518, |
|
"grad_norm": 0.1619795709848404, |
|
"learning_rate": 5.705902452974978e-05, |
|
"loss": 0.0147, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 4.950354609929078, |
|
"grad_norm": 0.16455408930778503, |
|
"learning_rate": 5.6776757640525736e-05, |
|
"loss": 0.015, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 4.957446808510638, |
|
"grad_norm": 0.1724604368209839, |
|
"learning_rate": 5.6494913617844604e-05, |
|
"loss": 0.0255, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 4.964539007092198, |
|
"grad_norm": 0.2437286525964737, |
|
"learning_rate": 5.6213495219074975e-05, |
|
"loss": 0.0194, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.971631205673759, |
|
"grad_norm": 0.20035170018672943, |
|
"learning_rate": 5.593250519742127e-05, |
|
"loss": 0.0197, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 4.9787234042553195, |
|
"grad_norm": 0.2261771708726883, |
|
"learning_rate": 5.5651946301897126e-05, |
|
"loss": 0.0173, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 4.98581560283688, |
|
"grad_norm": 0.24441353976726532, |
|
"learning_rate": 5.537182127729822e-05, |
|
"loss": 0.0154, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 4.99290780141844, |
|
"grad_norm": 0.2184004932641983, |
|
"learning_rate": 5.509213286417551e-05, |
|
"loss": 0.0145, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.3220185339450836, |
|
"learning_rate": 5.481288379880857e-05, |
|
"loss": 0.0204, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 5.00709219858156, |
|
"grad_norm": 0.37926825881004333, |
|
"learning_rate": 5.453407681317868e-05, |
|
"loss": 0.0158, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 5.01418439716312, |
|
"grad_norm": 0.37592944502830505, |
|
"learning_rate": 5.4255714634941936e-05, |
|
"loss": 0.0203, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 5.0212765957446805, |
|
"grad_norm": 0.32261180877685547, |
|
"learning_rate": 5.397779998740293e-05, |
|
"loss": 0.0187, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 5.028368794326241, |
|
"grad_norm": 0.24909910559654236, |
|
"learning_rate": 5.3700335589487925e-05, |
|
"loss": 0.0182, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 5.035460992907802, |
|
"grad_norm": 0.2592754065990448, |
|
"learning_rate": 5.3423324155718144e-05, |
|
"loss": 0.0145, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 5.042553191489362, |
|
"grad_norm": 0.30807894468307495, |
|
"learning_rate": 5.314676839618332e-05, |
|
"loss": 0.0134, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 5.049645390070922, |
|
"grad_norm": 0.1335049420595169, |
|
"learning_rate": 5.287067101651533e-05, |
|
"loss": 0.015, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 5.056737588652482, |
|
"grad_norm": 0.3497454822063446, |
|
"learning_rate": 5.259503471786136e-05, |
|
"loss": 0.0204, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 5.0638297872340425, |
|
"grad_norm": 0.19706270098686218, |
|
"learning_rate": 5.2319862196857914e-05, |
|
"loss": 0.017, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 5.070921985815603, |
|
"grad_norm": 0.31677910685539246, |
|
"learning_rate": 5.204515614560407e-05, |
|
"loss": 0.015, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 5.078014184397163, |
|
"grad_norm": 0.24632439017295837, |
|
"learning_rate": 5.177091925163529e-05, |
|
"loss": 0.0257, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 5.085106382978723, |
|
"grad_norm": 0.26360684633255005, |
|
"learning_rate": 5.149715419789723e-05, |
|
"loss": 0.0119, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 5.092198581560283, |
|
"grad_norm": 0.23850224912166595, |
|
"learning_rate": 5.122386366271923e-05, |
|
"loss": 0.0196, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 5.099290780141844, |
|
"grad_norm": 0.3354696035385132, |
|
"learning_rate": 5.0951050319788444e-05, |
|
"loss": 0.0138, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 5.1063829787234045, |
|
"grad_norm": 0.18239453434944153, |
|
"learning_rate": 5.067871683812338e-05, |
|
"loss": 0.0206, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 5.113475177304965, |
|
"grad_norm": 0.18862655758857727, |
|
"learning_rate": 5.0406865882047884e-05, |
|
"loss": 0.011, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 5.120567375886525, |
|
"grad_norm": 0.1917518526315689, |
|
"learning_rate": 5.0135500111165215e-05, |
|
"loss": 0.0139, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 5.127659574468085, |
|
"grad_norm": 0.19659163057804108, |
|
"learning_rate": 4.986462218033192e-05, |
|
"loss": 0.0161, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 5.134751773049645, |
|
"grad_norm": 0.23764602839946747, |
|
"learning_rate": 4.959423473963167e-05, |
|
"loss": 0.017, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 5.141843971631205, |
|
"grad_norm": 0.18340301513671875, |
|
"learning_rate": 4.932434043434975e-05, |
|
"loss": 0.0163, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 5.148936170212766, |
|
"grad_norm": 0.2168426811695099, |
|
"learning_rate": 4.905494190494674e-05, |
|
"loss": 0.0141, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 5.156028368794326, |
|
"grad_norm": 0.2772137522697449, |
|
"learning_rate": 4.878604178703308e-05, |
|
"loss": 0.0186, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 5.163120567375887, |
|
"grad_norm": 0.17947837710380554, |
|
"learning_rate": 4.851764271134296e-05, |
|
"loss": 0.0141, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 5.170212765957447, |
|
"grad_norm": 0.2101341038942337, |
|
"learning_rate": 4.824974730370871e-05, |
|
"loss": 0.0129, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 5.177304964539007, |
|
"grad_norm": 0.28040558099746704, |
|
"learning_rate": 4.798235818503522e-05, |
|
"loss": 0.0218, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 5.184397163120567, |
|
"grad_norm": 0.15831856429576874, |
|
"learning_rate": 4.771547797127418e-05, |
|
"loss": 0.0114, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 5.191489361702128, |
|
"grad_norm": 0.08963089436292648, |
|
"learning_rate": 4.744910927339842e-05, |
|
"loss": 0.0113, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 5.198581560283688, |
|
"grad_norm": 0.24577274918556213, |
|
"learning_rate": 4.7183254697376456e-05, |
|
"loss": 0.0145, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 5.205673758865248, |
|
"grad_norm": 0.31053343415260315, |
|
"learning_rate": 4.69179168441471e-05, |
|
"loss": 0.0133, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 5.212765957446808, |
|
"grad_norm": 0.1162974014878273, |
|
"learning_rate": 4.665309830959377e-05, |
|
"loss": 0.0167, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 5.219858156028369, |
|
"grad_norm": 0.30117878317832947, |
|
"learning_rate": 4.638880168451938e-05, |
|
"loss": 0.022, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 5.226950354609929, |
|
"grad_norm": 0.316582590341568, |
|
"learning_rate": 4.61250295546206e-05, |
|
"loss": 0.0186, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 5.23404255319149, |
|
"grad_norm": 0.3584196865558624, |
|
"learning_rate": 4.586178450046303e-05, |
|
"loss": 0.0182, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 5.24113475177305, |
|
"grad_norm": 0.2816001772880554, |
|
"learning_rate": 4.559906909745567e-05, |
|
"loss": 0.0175, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 5.24822695035461, |
|
"grad_norm": 0.29749003052711487, |
|
"learning_rate": 4.533688591582571e-05, |
|
"loss": 0.0132, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 5.25531914893617, |
|
"grad_norm": 0.15811972320079803, |
|
"learning_rate": 4.5075237520593435e-05, |
|
"loss": 0.0151, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 5.26241134751773, |
|
"grad_norm": 0.22752103209495544, |
|
"learning_rate": 4.4814126471547293e-05, |
|
"loss": 0.0272, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 5.2695035460992905, |
|
"grad_norm": 0.2540184259414673, |
|
"learning_rate": 4.455355532321852e-05, |
|
"loss": 0.0201, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 5.276595744680851, |
|
"grad_norm": 0.24097682535648346, |
|
"learning_rate": 4.429352662485652e-05, |
|
"loss": 0.0139, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 5.283687943262412, |
|
"grad_norm": 0.17041516304016113, |
|
"learning_rate": 4.403404292040357e-05, |
|
"loss": 0.014, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 5.290780141843972, |
|
"grad_norm": 0.2444879710674286, |
|
"learning_rate": 4.377510674847017e-05, |
|
"loss": 0.0128, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 5.297872340425532, |
|
"grad_norm": 0.17711535096168518, |
|
"learning_rate": 4.3516720642310204e-05, |
|
"loss": 0.0163, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 5.304964539007092, |
|
"grad_norm": 0.14002487063407898, |
|
"learning_rate": 4.3258887129795945e-05, |
|
"loss": 0.0164, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 5.3120567375886525, |
|
"grad_norm": 0.10432898253202438, |
|
"learning_rate": 4.300160873339364e-05, |
|
"loss": 0.0172, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 0.16279327869415283, |
|
"learning_rate": 4.2744887970138516e-05, |
|
"loss": 0.0226, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.326241134751773, |
|
"grad_norm": 0.13522747159004211, |
|
"learning_rate": 4.2488727351610335e-05, |
|
"loss": 0.0121, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 0.23896059393882751, |
|
"learning_rate": 4.2233129383908874e-05, |
|
"loss": 0.0193, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 5.340425531914893, |
|
"grad_norm": 0.20990808308124542, |
|
"learning_rate": 4.197809656762922e-05, |
|
"loss": 0.022, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 5.347517730496454, |
|
"grad_norm": 0.25492557883262634, |
|
"learning_rate": 4.1723631397837416e-05, |
|
"loss": 0.0138, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 5.3546099290780145, |
|
"grad_norm": 0.30456793308258057, |
|
"learning_rate": 4.1469736364046086e-05, |
|
"loss": 0.0174, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 5.361702127659575, |
|
"grad_norm": 0.18763889372348785, |
|
"learning_rate": 4.121641395019006e-05, |
|
"loss": 0.0136, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 5.368794326241135, |
|
"grad_norm": 0.17302776873111725, |
|
"learning_rate": 4.096366663460195e-05, |
|
"loss": 0.012, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 5.375886524822695, |
|
"grad_norm": 0.20956365764141083, |
|
"learning_rate": 4.0711496889988076e-05, |
|
"loss": 0.0147, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 5.382978723404255, |
|
"grad_norm": 0.24955029785633087, |
|
"learning_rate": 4.0459907183404135e-05, |
|
"loss": 0.0195, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 5.390070921985815, |
|
"grad_norm": 0.16770239174365997, |
|
"learning_rate": 4.02088999762312e-05, |
|
"loss": 0.0152, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 5.397163120567376, |
|
"grad_norm": 0.17319859564304352, |
|
"learning_rate": 3.995847772415159e-05, |
|
"loss": 0.0127, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 5.404255319148936, |
|
"grad_norm": 0.13932038843631744, |
|
"learning_rate": 3.9708642877124724e-05, |
|
"loss": 0.0121, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 5.411347517730497, |
|
"grad_norm": 0.2718459665775299, |
|
"learning_rate": 3.945939787936329e-05, |
|
"loss": 0.0244, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 5.418439716312057, |
|
"grad_norm": 0.15254083275794983, |
|
"learning_rate": 3.9210745169309374e-05, |
|
"loss": 0.0147, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 5.425531914893617, |
|
"grad_norm": 0.3398546576499939, |
|
"learning_rate": 3.896268717961041e-05, |
|
"loss": 0.0175, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 5.432624113475177, |
|
"grad_norm": 0.1195686087012291, |
|
"learning_rate": 3.871522633709555e-05, |
|
"loss": 0.018, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 5.439716312056738, |
|
"grad_norm": 0.22691002488136292, |
|
"learning_rate": 3.84683650627519e-05, |
|
"loss": 0.0126, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 5.446808510638298, |
|
"grad_norm": 0.27147871255874634, |
|
"learning_rate": 3.8222105771700725e-05, |
|
"loss": 0.0162, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 5.453900709219858, |
|
"grad_norm": 0.18269909918308258, |
|
"learning_rate": 3.7976450873174005e-05, |
|
"loss": 0.0134, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 5.460992907801418, |
|
"grad_norm": 0.1839030236005783, |
|
"learning_rate": 3.7731402770490654e-05, |
|
"loss": 0.0122, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 5.468085106382979, |
|
"grad_norm": 0.20004524290561676, |
|
"learning_rate": 3.748696386103313e-05, |
|
"loss": 0.0137, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 5.475177304964539, |
|
"grad_norm": 0.19872575998306274, |
|
"learning_rate": 3.724313653622404e-05, |
|
"loss": 0.0191, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 5.4822695035460995, |
|
"grad_norm": 0.23203954100608826, |
|
"learning_rate": 3.699992318150256e-05, |
|
"loss": 0.0146, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 5.48936170212766, |
|
"grad_norm": 0.3198186159133911, |
|
"learning_rate": 3.675732617630132e-05, |
|
"loss": 0.011, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 5.49645390070922, |
|
"grad_norm": 0.195682555437088, |
|
"learning_rate": 3.6515347894022914e-05, |
|
"loss": 0.0166, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 5.50354609929078, |
|
"grad_norm": 0.25143593549728394, |
|
"learning_rate": 3.627399070201676e-05, |
|
"loss": 0.0155, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 5.51063829787234, |
|
"grad_norm": 0.19740962982177734, |
|
"learning_rate": 3.603325696155605e-05, |
|
"loss": 0.0107, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 5.5177304964539005, |
|
"grad_norm": 0.1158263236284256, |
|
"learning_rate": 3.579314902781458e-05, |
|
"loss": 0.0162, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 5.524822695035461, |
|
"grad_norm": 0.29272812604904175, |
|
"learning_rate": 3.555366924984346e-05, |
|
"loss": 0.0199, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 5.531914893617021, |
|
"grad_norm": 0.13803668320178986, |
|
"learning_rate": 3.531481997054861e-05, |
|
"loss": 0.0105, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 5.539007092198582, |
|
"grad_norm": 0.16399915516376495, |
|
"learning_rate": 3.5076603526667404e-05, |
|
"loss": 0.0115, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 5.546099290780142, |
|
"grad_norm": 0.23237183690071106, |
|
"learning_rate": 3.4839022248746136e-05, |
|
"loss": 0.0152, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 5.553191489361702, |
|
"grad_norm": 0.22134317457675934, |
|
"learning_rate": 3.460207846111697e-05, |
|
"loss": 0.0128, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 5.560283687943262, |
|
"grad_norm": 0.14723555743694305, |
|
"learning_rate": 3.436577448187529e-05, |
|
"loss": 0.0126, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 5.567375886524823, |
|
"grad_norm": 0.17128515243530273, |
|
"learning_rate": 3.41301126228571e-05, |
|
"loss": 0.0145, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 5.574468085106383, |
|
"grad_norm": 0.20742635428905487, |
|
"learning_rate": 3.389509518961637e-05, |
|
"loss": 0.0151, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 5.581560283687943, |
|
"grad_norm": 0.16497284173965454, |
|
"learning_rate": 3.3660724481402326e-05, |
|
"loss": 0.0151, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 5.588652482269503, |
|
"grad_norm": 0.24318110942840576, |
|
"learning_rate": 3.3427002791137164e-05, |
|
"loss": 0.0159, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 5.595744680851064, |
|
"grad_norm": 0.18855370581150055, |
|
"learning_rate": 3.319393240539355e-05, |
|
"loss": 0.015, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 5.602836879432624, |
|
"grad_norm": 0.14175598323345184, |
|
"learning_rate": 3.296151560437214e-05, |
|
"loss": 0.0181, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 5.609929078014185, |
|
"grad_norm": 0.27144867181777954, |
|
"learning_rate": 3.272975466187951e-05, |
|
"loss": 0.0148, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 5.617021276595745, |
|
"grad_norm": 0.2221544086933136, |
|
"learning_rate": 3.249865184530563e-05, |
|
"loss": 0.0129, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 5.624113475177305, |
|
"grad_norm": 0.2543604075908661, |
|
"learning_rate": 3.226820941560186e-05, |
|
"loss": 0.014, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 5.631205673758865, |
|
"grad_norm": 0.10194625705480576, |
|
"learning_rate": 3.2038429627258845e-05, |
|
"loss": 0.0138, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 5.638297872340425, |
|
"grad_norm": 0.18883180618286133, |
|
"learning_rate": 3.180931472828435e-05, |
|
"loss": 0.0143, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 5.6453900709219855, |
|
"grad_norm": 0.1743205338716507, |
|
"learning_rate": 3.158086696018126e-05, |
|
"loss": 0.0128, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 5.652482269503546, |
|
"grad_norm": 0.2825423777103424, |
|
"learning_rate": 3.135308855792587e-05, |
|
"loss": 0.015, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 5.659574468085106, |
|
"grad_norm": 0.3754797577857971, |
|
"learning_rate": 3.1125981749945686e-05, |
|
"loss": 0.0117, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 5.666666666666667, |
|
"grad_norm": 0.19825135171413422, |
|
"learning_rate": 3.089954875809794e-05, |
|
"loss": 0.0129, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 5.673758865248227, |
|
"grad_norm": 0.33323025703430176, |
|
"learning_rate": 3.06737917976476e-05, |
|
"loss": 0.0155, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.680851063829787, |
|
"grad_norm": 0.09978597611188889, |
|
"learning_rate": 3.0448713077245838e-05, |
|
"loss": 0.0136, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 5.6879432624113475, |
|
"grad_norm": 0.14637133479118347, |
|
"learning_rate": 3.0224314798908414e-05, |
|
"loss": 0.0164, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 5.695035460992908, |
|
"grad_norm": 0.11906511336565018, |
|
"learning_rate": 3.0000599157994148e-05, |
|
"loss": 0.0134, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 5.702127659574468, |
|
"grad_norm": 0.21006911993026733, |
|
"learning_rate": 2.9777568343183303e-05, |
|
"loss": 0.0128, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 5.709219858156028, |
|
"grad_norm": 0.24388642609119415, |
|
"learning_rate": 2.955522453645635e-05, |
|
"loss": 0.0148, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 5.716312056737589, |
|
"grad_norm": 0.40704119205474854, |
|
"learning_rate": 2.9333569913072466e-05, |
|
"loss": 0.0142, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 5.723404255319149, |
|
"grad_norm": 0.24392291903495789, |
|
"learning_rate": 2.9112606641548436e-05, |
|
"loss": 0.0133, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 5.7304964539007095, |
|
"grad_norm": 0.22407054901123047, |
|
"learning_rate": 2.8892336883637327e-05, |
|
"loss": 0.0133, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 5.73758865248227, |
|
"grad_norm": 0.20735864341259003, |
|
"learning_rate": 2.8672762794307173e-05, |
|
"loss": 0.0108, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 5.74468085106383, |
|
"grad_norm": 0.20651739835739136, |
|
"learning_rate": 2.8453886521720264e-05, |
|
"loss": 0.0155, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 5.75177304964539, |
|
"grad_norm": 0.1309831440448761, |
|
"learning_rate": 2.8235710207211874e-05, |
|
"loss": 0.0171, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 5.75886524822695, |
|
"grad_norm": 0.29270994663238525, |
|
"learning_rate": 2.8018235985269325e-05, |
|
"loss": 0.0179, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 5.76595744680851, |
|
"grad_norm": 0.3431891202926636, |
|
"learning_rate": 2.7801465983511143e-05, |
|
"loss": 0.0156, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 5.773049645390071, |
|
"grad_norm": 0.12922121584415436, |
|
"learning_rate": 2.7585402322666333e-05, |
|
"loss": 0.0196, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 5.780141843971631, |
|
"grad_norm": 0.19691234827041626, |
|
"learning_rate": 2.737004711655342e-05, |
|
"loss": 0.0116, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 5.787234042553192, |
|
"grad_norm": 0.17019961774349213, |
|
"learning_rate": 2.7155402472060043e-05, |
|
"loss": 0.0145, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 5.794326241134752, |
|
"grad_norm": 0.18791693449020386, |
|
"learning_rate": 2.6941470489122056e-05, |
|
"loss": 0.0166, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 5.801418439716312, |
|
"grad_norm": 0.1223282665014267, |
|
"learning_rate": 2.6728253260703163e-05, |
|
"loss": 0.0117, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 5.808510638297872, |
|
"grad_norm": 0.16401457786560059, |
|
"learning_rate": 2.6515752872774458e-05, |
|
"loss": 0.0147, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 5.815602836879433, |
|
"grad_norm": 0.13671791553497314, |
|
"learning_rate": 2.6303971404293882e-05, |
|
"loss": 0.0128, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 5.822695035460993, |
|
"grad_norm": 0.21030209958553314, |
|
"learning_rate": 2.609291092718604e-05, |
|
"loss": 0.0157, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 5.829787234042553, |
|
"grad_norm": 0.19939203560352325, |
|
"learning_rate": 2.5882573506321772e-05, |
|
"loss": 0.0139, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 5.836879432624113, |
|
"grad_norm": 0.26017311215400696, |
|
"learning_rate": 2.5672961199498058e-05, |
|
"loss": 0.0133, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 5.843971631205674, |
|
"grad_norm": 0.29861557483673096, |
|
"learning_rate": 2.5464076057417883e-05, |
|
"loss": 0.0154, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"grad_norm": 0.17012879252433777, |
|
"learning_rate": 2.5255920123670196e-05, |
|
"loss": 0.0163, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 5.858156028368795, |
|
"grad_norm": 0.20148979127407074, |
|
"learning_rate": 2.5048495434709708e-05, |
|
"loss": 0.0117, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 5.865248226950355, |
|
"grad_norm": 0.2007923573255539, |
|
"learning_rate": 2.4841804019837323e-05, |
|
"loss": 0.0146, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 5.872340425531915, |
|
"grad_norm": 0.1671822965145111, |
|
"learning_rate": 2.4635847901179932e-05, |
|
"loss": 0.0174, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 5.879432624113475, |
|
"grad_norm": 0.22692982852458954, |
|
"learning_rate": 2.4430629093670963e-05, |
|
"loss": 0.0173, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 5.886524822695035, |
|
"grad_norm": 0.142043337225914, |
|
"learning_rate": 2.4226149605030344e-05, |
|
"loss": 0.0099, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 5.8936170212765955, |
|
"grad_norm": 0.1621280163526535, |
|
"learning_rate": 2.4022411435745074e-05, |
|
"loss": 0.0111, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 5.900709219858156, |
|
"grad_norm": 0.21993504464626312, |
|
"learning_rate": 2.3819416579049603e-05, |
|
"loss": 0.01, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 5.907801418439716, |
|
"grad_norm": 0.2549281716346741, |
|
"learning_rate": 2.361716702090634e-05, |
|
"loss": 0.0156, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 5.914893617021277, |
|
"grad_norm": 0.13125896453857422, |
|
"learning_rate": 2.3415664739986165e-05, |
|
"loss": 0.0179, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 5.921985815602837, |
|
"grad_norm": 0.2061365842819214, |
|
"learning_rate": 2.321491170764908e-05, |
|
"loss": 0.0149, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 5.929078014184397, |
|
"grad_norm": 0.2736816108226776, |
|
"learning_rate": 2.3014909887925042e-05, |
|
"loss": 0.0186, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 5.9361702127659575, |
|
"grad_norm": 0.1882363259792328, |
|
"learning_rate": 2.281566123749458e-05, |
|
"loss": 0.0211, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 5.943262411347518, |
|
"grad_norm": 0.2734488844871521, |
|
"learning_rate": 2.2617167705669827e-05, |
|
"loss": 0.0132, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 5.950354609929078, |
|
"grad_norm": 0.20115582644939423, |
|
"learning_rate": 2.2419431234375178e-05, |
|
"loss": 0.0121, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 5.957446808510638, |
|
"grad_norm": 0.1607801914215088, |
|
"learning_rate": 2.2222453758128648e-05, |
|
"loss": 0.0128, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 5.964539007092198, |
|
"grad_norm": 0.20927660167217255, |
|
"learning_rate": 2.2026237204022716e-05, |
|
"loss": 0.0097, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 5.971631205673759, |
|
"grad_norm": 0.15127459168434143, |
|
"learning_rate": 2.1830783491705477e-05, |
|
"loss": 0.0096, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 5.9787234042553195, |
|
"grad_norm": 0.25664299726486206, |
|
"learning_rate": 2.1636094533361896e-05, |
|
"loss": 0.0135, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 5.98581560283688, |
|
"grad_norm": 0.1857176572084427, |
|
"learning_rate": 2.14421722336952e-05, |
|
"loss": 0.0131, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 5.99290780141844, |
|
"grad_norm": 0.11627791076898575, |
|
"learning_rate": 2.1249018489908056e-05, |
|
"loss": 0.0101, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.42432013154029846, |
|
"learning_rate": 2.1056635191684183e-05, |
|
"loss": 0.0128, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 6.00709219858156, |
|
"grad_norm": 0.21808061003684998, |
|
"learning_rate": 2.086502422116974e-05, |
|
"loss": 0.0136, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 6.01418439716312, |
|
"grad_norm": 0.2082509845495224, |
|
"learning_rate": 2.067418745295494e-05, |
|
"loss": 0.0171, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 6.0212765957446805, |
|
"grad_norm": 0.1480574756860733, |
|
"learning_rate": 2.0484126754055842e-05, |
|
"loss": 0.0125, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 6.028368794326241, |
|
"grad_norm": 0.21675336360931396, |
|
"learning_rate": 2.0294843983895828e-05, |
|
"loss": 0.0148, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.035460992907802, |
|
"grad_norm": 0.22429972887039185, |
|
"learning_rate": 2.0106340994287698e-05, |
|
"loss": 0.018, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 6.042553191489362, |
|
"grad_norm": 0.18754935264587402, |
|
"learning_rate": 1.9918619629415314e-05, |
|
"loss": 0.0116, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 6.049645390070922, |
|
"grad_norm": 0.1608024388551712, |
|
"learning_rate": 1.9731681725815676e-05, |
|
"loss": 0.0083, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 6.056737588652482, |
|
"grad_norm": 0.19191201031208038, |
|
"learning_rate": 1.9545529112361005e-05, |
|
"loss": 0.0173, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 6.0638297872340425, |
|
"grad_norm": 0.18507297337055206, |
|
"learning_rate": 1.93601636102407e-05, |
|
"loss": 0.0095, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 6.070921985815603, |
|
"grad_norm": 0.18908648192882538, |
|
"learning_rate": 1.917558703294361e-05, |
|
"loss": 0.0129, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 6.078014184397163, |
|
"grad_norm": 0.23118513822555542, |
|
"learning_rate": 1.8991801186240342e-05, |
|
"loss": 0.0089, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 6.085106382978723, |
|
"grad_norm": 0.24339988827705383, |
|
"learning_rate": 1.8808807868165512e-05, |
|
"loss": 0.0116, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 6.092198581560283, |
|
"grad_norm": 0.26911652088165283, |
|
"learning_rate": 1.862660886900016e-05, |
|
"loss": 0.0145, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 6.099290780141844, |
|
"grad_norm": 0.15160147845745087, |
|
"learning_rate": 1.8445205971254243e-05, |
|
"loss": 0.0101, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 6.1063829787234045, |
|
"grad_norm": 0.1248091533780098, |
|
"learning_rate": 1.826460094964928e-05, |
|
"loss": 0.0143, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 6.113475177304965, |
|
"grad_norm": 0.19379976391792297, |
|
"learning_rate": 1.808479557110081e-05, |
|
"loss": 0.0112, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 6.120567375886525, |
|
"grad_norm": 0.16450917720794678, |
|
"learning_rate": 1.7905791594701337e-05, |
|
"loss": 0.0202, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 6.127659574468085, |
|
"grad_norm": 0.3321121335029602, |
|
"learning_rate": 1.7727590771702894e-05, |
|
"loss": 0.0107, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 6.134751773049645, |
|
"grad_norm": 0.1483653485774994, |
|
"learning_rate": 1.7550194845500025e-05, |
|
"loss": 0.0123, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 6.141843971631205, |
|
"grad_norm": 0.17642395198345184, |
|
"learning_rate": 1.7373605551612805e-05, |
|
"loss": 0.009, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 6.148936170212766, |
|
"grad_norm": 0.24330726265907288, |
|
"learning_rate": 1.7197824617669655e-05, |
|
"loss": 0.0113, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 6.156028368794326, |
|
"grad_norm": 0.2407854199409485, |
|
"learning_rate": 1.7022853763390623e-05, |
|
"loss": 0.013, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 6.163120567375887, |
|
"grad_norm": 0.237819641828537, |
|
"learning_rate": 1.68486947005705e-05, |
|
"loss": 0.013, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 6.170212765957447, |
|
"grad_norm": 0.16382509469985962, |
|
"learning_rate": 1.6675349133062e-05, |
|
"loss": 0.0128, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 6.177304964539007, |
|
"grad_norm": 0.2901313602924347, |
|
"learning_rate": 1.6502818756759276e-05, |
|
"loss": 0.0085, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 6.184397163120567, |
|
"grad_norm": 0.23529794812202454, |
|
"learning_rate": 1.633110525958108e-05, |
|
"loss": 0.009, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 6.191489361702128, |
|
"grad_norm": 0.21968472003936768, |
|
"learning_rate": 1.616021032145444e-05, |
|
"loss": 0.0158, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 6.198581560283688, |
|
"grad_norm": 0.21688121557235718, |
|
"learning_rate": 1.5990135614298184e-05, |
|
"loss": 0.0114, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 6.205673758865248, |
|
"grad_norm": 0.19126644730567932, |
|
"learning_rate": 1.582088280200652e-05, |
|
"loss": 0.0111, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 6.212765957446808, |
|
"grad_norm": 0.2854389548301697, |
|
"learning_rate": 1.5652453540432856e-05, |
|
"loss": 0.0124, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 6.219858156028369, |
|
"grad_norm": 0.23691484332084656, |
|
"learning_rate": 1.5484849477373463e-05, |
|
"loss": 0.0163, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 6.226950354609929, |
|
"grad_norm": 0.10352014750242233, |
|
"learning_rate": 1.5318072252551498e-05, |
|
"loss": 0.0121, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 6.23404255319149, |
|
"grad_norm": 0.1918143928050995, |
|
"learning_rate": 1.5152123497600879e-05, |
|
"loss": 0.0166, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 6.24113475177305, |
|
"grad_norm": 0.27419474720954895, |
|
"learning_rate": 1.49870048360504e-05, |
|
"loss": 0.0176, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 6.24822695035461, |
|
"grad_norm": 0.1279487907886505, |
|
"learning_rate": 1.4822717883307658e-05, |
|
"loss": 0.0165, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 6.25531914893617, |
|
"grad_norm": 0.20851808786392212, |
|
"learning_rate": 1.46592642466435e-05, |
|
"loss": 0.0111, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 6.26241134751773, |
|
"grad_norm": 0.20219635963439941, |
|
"learning_rate": 1.4496645525176166e-05, |
|
"loss": 0.0105, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 6.2695035460992905, |
|
"grad_norm": 0.17594772577285767, |
|
"learning_rate": 1.4334863309855617e-05, |
|
"loss": 0.02, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 6.276595744680851, |
|
"grad_norm": 0.1454489380121231, |
|
"learning_rate": 1.4173919183448026e-05, |
|
"loss": 0.0147, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 6.283687943262412, |
|
"grad_norm": 0.36037492752075195, |
|
"learning_rate": 1.4013814720520258e-05, |
|
"loss": 0.0104, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 6.290780141843972, |
|
"grad_norm": 0.29365283250808716, |
|
"learning_rate": 1.385455148742455e-05, |
|
"loss": 0.0169, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 6.297872340425532, |
|
"grad_norm": 0.2135290801525116, |
|
"learning_rate": 1.36961310422831e-05, |
|
"loss": 0.0183, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 6.304964539007092, |
|
"grad_norm": 0.27943718433380127, |
|
"learning_rate": 1.3538554934972813e-05, |
|
"loss": 0.0148, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 6.3120567375886525, |
|
"grad_norm": 0.18066227436065674, |
|
"learning_rate": 1.3381824707110157e-05, |
|
"loss": 0.0115, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 6.319148936170213, |
|
"grad_norm": 0.087078757584095, |
|
"learning_rate": 1.3225941892036198e-05, |
|
"loss": 0.0121, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 6.326241134751773, |
|
"grad_norm": 0.26353609561920166, |
|
"learning_rate": 1.3070908014801375e-05, |
|
"loss": 0.0087, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 6.333333333333333, |
|
"grad_norm": 0.11882209032773972, |
|
"learning_rate": 1.2916724592150798e-05, |
|
"loss": 0.0088, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 6.340425531914893, |
|
"grad_norm": 0.17999006807804108, |
|
"learning_rate": 1.276339313250925e-05, |
|
"loss": 0.0094, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 6.347517730496454, |
|
"grad_norm": 0.16146190464496613, |
|
"learning_rate": 1.2610915135966495e-05, |
|
"loss": 0.0112, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 6.3546099290780145, |
|
"grad_norm": 0.26943373680114746, |
|
"learning_rate": 1.2459292094262664e-05, |
|
"loss": 0.014, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 6.361702127659575, |
|
"grad_norm": 0.22631198167800903, |
|
"learning_rate": 1.2308525490773526e-05, |
|
"loss": 0.0103, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 6.368794326241135, |
|
"grad_norm": 0.25705012679100037, |
|
"learning_rate": 1.2158616800496059e-05, |
|
"loss": 0.0152, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 6.375886524822695, |
|
"grad_norm": 0.23096348345279694, |
|
"learning_rate": 1.2009567490034046e-05, |
|
"loss": 0.0128, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"grad_norm": 0.19161191582679749, |
|
"learning_rate": 1.186137901758364e-05, |
|
"loss": 0.0142, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.390070921985815, |
|
"grad_norm": 0.17679370939731598, |
|
"learning_rate": 1.1714052832919187e-05, |
|
"loss": 0.0163, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 6.397163120567376, |
|
"grad_norm": 0.1845289170742035, |
|
"learning_rate": 1.1567590377378979e-05, |
|
"loss": 0.0137, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 6.404255319148936, |
|
"grad_norm": 0.10619324445724487, |
|
"learning_rate": 1.1421993083851145e-05, |
|
"loss": 0.0119, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 6.411347517730497, |
|
"grad_norm": 0.17025884985923767, |
|
"learning_rate": 1.1277262376759712e-05, |
|
"loss": 0.0147, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 6.418439716312057, |
|
"grad_norm": 0.24711932241916656, |
|
"learning_rate": 1.1133399672050638e-05, |
|
"loss": 0.0128, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 6.425531914893617, |
|
"grad_norm": 0.24352525174617767, |
|
"learning_rate": 1.0990406377177865e-05, |
|
"loss": 0.0163, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 6.432624113475177, |
|
"grad_norm": 0.11696803569793701, |
|
"learning_rate": 1.0848283891089683e-05, |
|
"loss": 0.0108, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 6.439716312056738, |
|
"grad_norm": 0.14711810648441315, |
|
"learning_rate": 1.0707033604214944e-05, |
|
"loss": 0.0091, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 6.446808510638298, |
|
"grad_norm": 0.08239645510911942, |
|
"learning_rate": 1.0566656898449546e-05, |
|
"loss": 0.0119, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 6.453900709219858, |
|
"grad_norm": 0.1815529316663742, |
|
"learning_rate": 1.0427155147142887e-05, |
|
"loss": 0.0122, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 6.460992907801418, |
|
"grad_norm": 0.16181941330432892, |
|
"learning_rate": 1.0288529715084293e-05, |
|
"loss": 0.0091, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 6.468085106382979, |
|
"grad_norm": 0.1356610655784607, |
|
"learning_rate": 1.0150781958489919e-05, |
|
"loss": 0.0115, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 6.475177304964539, |
|
"grad_norm": 0.10379713028669357, |
|
"learning_rate": 1.0013913224989303e-05, |
|
"loss": 0.0152, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 6.4822695035460995, |
|
"grad_norm": 0.1019928902387619, |
|
"learning_rate": 9.877924853612186e-06, |
|
"loss": 0.011, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 6.48936170212766, |
|
"grad_norm": 0.2234257161617279, |
|
"learning_rate": 9.74281817477547e-06, |
|
"loss": 0.0108, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 6.49645390070922, |
|
"grad_norm": 0.2257377654314041, |
|
"learning_rate": 9.608594510270218e-06, |
|
"loss": 0.0111, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 6.50354609929078, |
|
"grad_norm": 0.1945466548204422, |
|
"learning_rate": 9.47525517324862e-06, |
|
"loss": 0.0106, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 6.51063829787234, |
|
"grad_norm": 0.39022570848464966, |
|
"learning_rate": 9.342801468211283e-06, |
|
"loss": 0.0112, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 6.5177304964539005, |
|
"grad_norm": 0.16999700665473938, |
|
"learning_rate": 9.211234690994364e-06, |
|
"loss": 0.0161, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 6.524822695035461, |
|
"grad_norm": 0.38921093940734863, |
|
"learning_rate": 9.080556128756901e-06, |
|
"loss": 0.0107, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 6.531914893617021, |
|
"grad_norm": 0.26619699597358704, |
|
"learning_rate": 8.950767059968302e-06, |
|
"loss": 0.0164, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 6.539007092198582, |
|
"grad_norm": 0.19151495397090912, |
|
"learning_rate": 8.821868754395734e-06, |
|
"loss": 0.0111, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 6.546099290780142, |
|
"grad_norm": 0.09225843101739883, |
|
"learning_rate": 8.693862473091785e-06, |
|
"loss": 0.0113, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 6.553191489361702, |
|
"grad_norm": 0.2490764856338501, |
|
"learning_rate": 8.566749468382074e-06, |
|
"loss": 0.0163, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 6.560283687943262, |
|
"grad_norm": 0.2187177836894989, |
|
"learning_rate": 8.440530983852978e-06, |
|
"loss": 0.0132, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 6.567375886524823, |
|
"grad_norm": 0.12911982834339142, |
|
"learning_rate": 8.315208254339557e-06, |
|
"loss": 0.011, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 6.574468085106383, |
|
"grad_norm": 0.20145867764949799, |
|
"learning_rate": 8.190782505913442e-06, |
|
"loss": 0.0134, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 6.581560283687943, |
|
"grad_norm": 0.2898944914340973, |
|
"learning_rate": 8.067254955870707e-06, |
|
"loss": 0.017, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 6.588652482269503, |
|
"grad_norm": 0.08676121383905411, |
|
"learning_rate": 7.944626812720169e-06, |
|
"loss": 0.0096, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 6.595744680851064, |
|
"grad_norm": 0.2594836950302124, |
|
"learning_rate": 7.822899276171403e-06, |
|
"loss": 0.0156, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 6.602836879432624, |
|
"grad_norm": 0.15736845135688782, |
|
"learning_rate": 7.702073537123145e-06, |
|
"loss": 0.0109, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 6.609929078014185, |
|
"grad_norm": 0.25035277009010315, |
|
"learning_rate": 7.5821507776514866e-06, |
|
"loss": 0.0261, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 6.617021276595745, |
|
"grad_norm": 0.361659973859787, |
|
"learning_rate": 7.463132170998388e-06, |
|
"loss": 0.0117, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 6.624113475177305, |
|
"grad_norm": 0.21234659850597382, |
|
"learning_rate": 7.345018881560251e-06, |
|
"loss": 0.0114, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 6.631205673758865, |
|
"grad_norm": 0.1619425266981125, |
|
"learning_rate": 7.227812064876471e-06, |
|
"loss": 0.0095, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 6.638297872340425, |
|
"grad_norm": 0.15260903537273407, |
|
"learning_rate": 7.1115128676180975e-06, |
|
"loss": 0.0129, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 6.6453900709219855, |
|
"grad_norm": 0.13201937079429626, |
|
"learning_rate": 6.996122427576635e-06, |
|
"loss": 0.0216, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 6.652482269503546, |
|
"grad_norm": 0.17256666719913483, |
|
"learning_rate": 6.881641873653022e-06, |
|
"loss": 0.0079, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 6.659574468085106, |
|
"grad_norm": 0.3167516589164734, |
|
"learning_rate": 6.768072325846387e-06, |
|
"loss": 0.0187, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.17999926209449768, |
|
"learning_rate": 6.655414895243306e-06, |
|
"loss": 0.0135, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 6.673758865248227, |
|
"grad_norm": 0.20833925902843475, |
|
"learning_rate": 6.543670684006742e-06, |
|
"loss": 0.0117, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 6.680851063829787, |
|
"grad_norm": 0.19959613680839539, |
|
"learning_rate": 6.432840785365368e-06, |
|
"loss": 0.0113, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 6.6879432624113475, |
|
"grad_norm": 0.16003111004829407, |
|
"learning_rate": 6.3229262836028924e-06, |
|
"loss": 0.013, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 6.695035460992908, |
|
"grad_norm": 0.295195609331131, |
|
"learning_rate": 6.213928254047352e-06, |
|
"loss": 0.0137, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 6.702127659574468, |
|
"grad_norm": 0.26150402426719666, |
|
"learning_rate": 6.105847763060668e-06, |
|
"loss": 0.0152, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 6.709219858156028, |
|
"grad_norm": 0.10643389075994492, |
|
"learning_rate": 5.998685868028231e-06, |
|
"loss": 0.0076, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 6.716312056737589, |
|
"grad_norm": 0.1366465985774994, |
|
"learning_rate": 5.892443617348431e-06, |
|
"loss": 0.0107, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 6.723404255319149, |
|
"grad_norm": 0.22469396889209747, |
|
"learning_rate": 5.7871220504226e-06, |
|
"loss": 0.0099, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 6.7304964539007095, |
|
"grad_norm": 0.17117497324943542, |
|
"learning_rate": 5.682722197644652e-06, |
|
"loss": 0.0108, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 6.73758865248227, |
|
"grad_norm": 0.21108581125736237, |
|
"learning_rate": 5.579245080391094e-06, |
|
"loss": 0.0083, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.74468085106383, |
|
"grad_norm": 0.1274946630001068, |
|
"learning_rate": 5.47669171101105e-06, |
|
"loss": 0.0141, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 6.75177304964539, |
|
"grad_norm": 0.22684040665626526, |
|
"learning_rate": 5.375063092816313e-06, |
|
"loss": 0.0107, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 6.75886524822695, |
|
"grad_norm": 0.12946170568466187, |
|
"learning_rate": 5.2743602200715505e-06, |
|
"loss": 0.0124, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 6.76595744680851, |
|
"grad_norm": 0.19293436408042908, |
|
"learning_rate": 5.1745840779845455e-06, |
|
"loss": 0.0103, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 6.773049645390071, |
|
"grad_norm": 0.17891868948936462, |
|
"learning_rate": 5.075735642696611e-06, |
|
"loss": 0.0172, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 6.780141843971631, |
|
"grad_norm": 0.15858317911624908, |
|
"learning_rate": 4.977815881273018e-06, |
|
"loss": 0.0094, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 6.787234042553192, |
|
"grad_norm": 0.1298726350069046, |
|
"learning_rate": 4.880825751693518e-06, |
|
"loss": 0.0129, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 6.794326241134752, |
|
"grad_norm": 0.14093485474586487, |
|
"learning_rate": 4.784766202842961e-06, |
|
"loss": 0.0063, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 6.801418439716312, |
|
"grad_norm": 0.3153584599494934, |
|
"learning_rate": 4.689638174502076e-06, |
|
"loss": 0.0099, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 6.808510638297872, |
|
"grad_norm": 0.16759419441223145, |
|
"learning_rate": 4.595442597338217e-06, |
|
"loss": 0.0163, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 6.815602836879433, |
|
"grad_norm": 0.2294328510761261, |
|
"learning_rate": 4.502180392896272e-06, |
|
"loss": 0.0084, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 6.822695035460993, |
|
"grad_norm": 0.25540658831596375, |
|
"learning_rate": 4.409852473589626e-06, |
|
"loss": 0.0109, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 6.829787234042553, |
|
"grad_norm": 0.16674602031707764, |
|
"learning_rate": 4.318459742691316e-06, |
|
"loss": 0.0098, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 6.836879432624113, |
|
"grad_norm": 0.18922989070415497, |
|
"learning_rate": 4.228003094325084e-06, |
|
"loss": 0.0077, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 6.843971631205674, |
|
"grad_norm": 0.12138693034648895, |
|
"learning_rate": 4.13848341345674e-06, |
|
"loss": 0.012, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 6.851063829787234, |
|
"grad_norm": 0.2116478681564331, |
|
"learning_rate": 4.049901575885373e-06, |
|
"loss": 0.0114, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 6.858156028368795, |
|
"grad_norm": 0.22467593848705292, |
|
"learning_rate": 3.962258448234912e-06, |
|
"loss": 0.0211, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 6.865248226950355, |
|
"grad_norm": 0.13237184286117554, |
|
"learning_rate": 3.875554887945576e-06, |
|
"loss": 0.0195, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 6.872340425531915, |
|
"grad_norm": 0.15521298348903656, |
|
"learning_rate": 3.789791743265503e-06, |
|
"loss": 0.0107, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 6.879432624113475, |
|
"grad_norm": 0.23321086168289185, |
|
"learning_rate": 3.704969853242446e-06, |
|
"loss": 0.0177, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 6.886524822695035, |
|
"grad_norm": 0.1563076227903366, |
|
"learning_rate": 3.6210900477155696e-06, |
|
"loss": 0.0094, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 6.8936170212765955, |
|
"grad_norm": 0.12247644364833832, |
|
"learning_rate": 3.5381531473073326e-06, |
|
"loss": 0.016, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 6.900709219858156, |
|
"grad_norm": 0.19630227982997894, |
|
"learning_rate": 3.456159963415473e-06, |
|
"loss": 0.0108, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 6.907801418439716, |
|
"grad_norm": 0.25300222635269165, |
|
"learning_rate": 3.3751112982050135e-06, |
|
"loss": 0.0112, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"grad_norm": 0.17951223254203796, |
|
"learning_rate": 3.295007944600481e-06, |
|
"loss": 0.0158, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 6.921985815602837, |
|
"grad_norm": 0.09103976935148239, |
|
"learning_rate": 3.215850686278132e-06, |
|
"loss": 0.0127, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 6.929078014184397, |
|
"grad_norm": 0.13495738804340363, |
|
"learning_rate": 3.1376402976582507e-06, |
|
"loss": 0.008, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 6.9361702127659575, |
|
"grad_norm": 0.12648430466651917, |
|
"learning_rate": 3.060377543897619e-06, |
|
"loss": 0.0106, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 6.943262411347518, |
|
"grad_norm": 0.22089631855487823, |
|
"learning_rate": 2.984063180882013e-06, |
|
"loss": 0.0137, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 6.950354609929078, |
|
"grad_norm": 0.12765297293663025, |
|
"learning_rate": 2.908697955218753e-06, |
|
"loss": 0.0088, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 6.957446808510638, |
|
"grad_norm": 0.33271655440330505, |
|
"learning_rate": 2.834282604229521e-06, |
|
"loss": 0.0111, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 6.964539007092198, |
|
"grad_norm": 0.2782209515571594, |
|
"learning_rate": 2.7608178559430653e-06, |
|
"loss": 0.0102, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 6.971631205673759, |
|
"grad_norm": 0.21345843374729156, |
|
"learning_rate": 2.6883044290880178e-06, |
|
"loss": 0.0105, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 6.9787234042553195, |
|
"grad_norm": 0.10163545608520508, |
|
"learning_rate": 2.616743033086022e-06, |
|
"loss": 0.0081, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 6.98581560283688, |
|
"grad_norm": 0.13964958488941193, |
|
"learning_rate": 2.5461343680446727e-06, |
|
"loss": 0.0121, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 6.99290780141844, |
|
"grad_norm": 0.13182148337364197, |
|
"learning_rate": 2.476479124750697e-06, |
|
"loss": 0.0128, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.24027769267559052, |
|
"learning_rate": 2.4077779846631732e-06, |
|
"loss": 0.0111, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 7.00709219858156, |
|
"grad_norm": 0.3481847941875458, |
|
"learning_rate": 2.3400316199069238e-06, |
|
"loss": 0.0103, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 7.01418439716312, |
|
"grad_norm": 0.19290268421173096, |
|
"learning_rate": 2.273240693265899e-06, |
|
"loss": 0.014, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 7.0212765957446805, |
|
"grad_norm": 0.059760428965091705, |
|
"learning_rate": 2.207405858176692e-06, |
|
"loss": 0.0111, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 7.028368794326241, |
|
"grad_norm": 0.2962135374546051, |
|
"learning_rate": 2.142527758722157e-06, |
|
"loss": 0.0125, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 7.035460992907802, |
|
"grad_norm": 0.06278011202812195, |
|
"learning_rate": 2.0786070296250793e-06, |
|
"loss": 0.0076, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 7.042553191489362, |
|
"grad_norm": 0.06869203597307205, |
|
"learning_rate": 2.0156442962420252e-06, |
|
"loss": 0.0101, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 7.049645390070922, |
|
"grad_norm": 0.250283420085907, |
|
"learning_rate": 1.95364017455717e-06, |
|
"loss": 0.0195, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 7.056737588652482, |
|
"grad_norm": 0.1912689059972763, |
|
"learning_rate": 1.8925952711763006e-06, |
|
"loss": 0.0138, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 7.0638297872340425, |
|
"grad_norm": 0.120729461312294, |
|
"learning_rate": 1.8325101833208457e-06, |
|
"loss": 0.0158, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 7.070921985815603, |
|
"grad_norm": 0.2449929565191269, |
|
"learning_rate": 1.7733854988220778e-06, |
|
"loss": 0.0111, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 7.078014184397163, |
|
"grad_norm": 0.11768271028995514, |
|
"learning_rate": 1.7152217961153405e-06, |
|
"loss": 0.011, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 7.085106382978723, |
|
"grad_norm": 0.09861616790294647, |
|
"learning_rate": 1.6580196442343987e-06, |
|
"loss": 0.0097, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 7.092198581560283, |
|
"grad_norm": 0.12583206593990326, |
|
"learning_rate": 1.601779602805842e-06, |
|
"loss": 0.0089, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.099290780141844, |
|
"grad_norm": 0.13632884621620178, |
|
"learning_rate": 1.5465022220436442e-06, |
|
"loss": 0.01, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 7.1063829787234045, |
|
"grad_norm": 0.19436778128147125, |
|
"learning_rate": 1.4921880427437584e-06, |
|
"loss": 0.0166, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 7.113475177304965, |
|
"grad_norm": 0.10597945749759674, |
|
"learning_rate": 1.4388375962788637e-06, |
|
"loss": 0.0091, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 7.120567375886525, |
|
"grad_norm": 0.08205860108137131, |
|
"learning_rate": 1.3864514045931032e-06, |
|
"loss": 0.0105, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 7.127659574468085, |
|
"grad_norm": 0.19386780261993408, |
|
"learning_rate": 1.3350299801970335e-06, |
|
"loss": 0.0149, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 7.134751773049645, |
|
"grad_norm": 0.22453084588050842, |
|
"learning_rate": 1.2845738261625828e-06, |
|
"loss": 0.0116, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 7.141843971631205, |
|
"grad_norm": 0.1868603378534317, |
|
"learning_rate": 1.235083436118145e-06, |
|
"loss": 0.0089, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 7.148936170212766, |
|
"grad_norm": 0.12683051824569702, |
|
"learning_rate": 1.1865592942437275e-06, |
|
"loss": 0.0102, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 7.156028368794326, |
|
"grad_norm": 0.17270110547542572, |
|
"learning_rate": 1.1390018752662436e-06, |
|
"loss": 0.0141, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 7.163120567375887, |
|
"grad_norm": 0.3085339069366455, |
|
"learning_rate": 1.0924116444548383e-06, |
|
"loss": 0.0101, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 7.170212765957447, |
|
"grad_norm": 0.12679530680179596, |
|
"learning_rate": 1.0467890576163707e-06, |
|
"loss": 0.0111, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 7.177304964539007, |
|
"grad_norm": 0.23530429601669312, |
|
"learning_rate": 1.0021345610909171e-06, |
|
"loss": 0.0136, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 7.184397163120567, |
|
"grad_norm": 0.06431034952402115, |
|
"learning_rate": 9.584485917474185e-07, |
|
"loss": 0.0089, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 7.191489361702128, |
|
"grad_norm": 0.15055204927921295, |
|
"learning_rate": 9.157315769794284e-07, |
|
"loss": 0.009, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 7.198581560283688, |
|
"grad_norm": 0.1731652468442917, |
|
"learning_rate": 8.739839347009171e-07, |
|
"loss": 0.0135, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 7.205673758865248, |
|
"grad_norm": 0.1978446990251541, |
|
"learning_rate": 8.332060733421631e-07, |
|
"loss": 0.0116, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 7.212765957446808, |
|
"grad_norm": 0.29635027050971985, |
|
"learning_rate": 7.933983918457677e-07, |
|
"loss": 0.0117, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 7.219858156028369, |
|
"grad_norm": 0.23973168432712555, |
|
"learning_rate": 7.54561279662791e-07, |
|
"loss": 0.0146, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 7.226950354609929, |
|
"grad_norm": 0.12211582064628601, |
|
"learning_rate": 7.166951167488667e-07, |
|
"loss": 0.0076, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 7.23404255319149, |
|
"grad_norm": 0.1157701313495636, |
|
"learning_rate": 6.798002735605602e-07, |
|
"loss": 0.0166, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 7.24113475177305, |
|
"grad_norm": 0.08518693596124649, |
|
"learning_rate": 6.43877111051705e-07, |
|
"loss": 0.0116, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 7.24822695035461, |
|
"grad_norm": 0.3765665888786316, |
|
"learning_rate": 6.089259806698611e-07, |
|
"loss": 0.0174, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 7.25531914893617, |
|
"grad_norm": 0.315969854593277, |
|
"learning_rate": 5.749472243529064e-07, |
|
"loss": 0.0179, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 7.26241134751773, |
|
"grad_norm": 0.1261627972126007, |
|
"learning_rate": 5.419411745256841e-07, |
|
"loss": 0.0145, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 7.2695035460992905, |
|
"grad_norm": 0.14078722894191742, |
|
"learning_rate": 5.099081540967277e-07, |
|
"loss": 0.0082, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 7.276595744680851, |
|
"grad_norm": 0.19919142127037048, |
|
"learning_rate": 4.788484764551293e-07, |
|
"loss": 0.0096, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 7.283687943262412, |
|
"grad_norm": 0.1621677577495575, |
|
"learning_rate": 4.487624454674544e-07, |
|
"loss": 0.0101, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 7.290780141843972, |
|
"grad_norm": 0.07020825147628784, |
|
"learning_rate": 4.196503554747988e-07, |
|
"loss": 0.0081, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 7.297872340425532, |
|
"grad_norm": 0.4086223244667053, |
|
"learning_rate": 3.9151249128988043e-07, |
|
"loss": 0.0108, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 7.304964539007092, |
|
"grad_norm": 0.19677507877349854, |
|
"learning_rate": 3.643491281942302e-07, |
|
"loss": 0.0088, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 7.3120567375886525, |
|
"grad_norm": 0.18708615005016327, |
|
"learning_rate": 3.3816053193556073e-07, |
|
"loss": 0.0137, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 7.319148936170213, |
|
"grad_norm": 0.21218359470367432, |
|
"learning_rate": 3.129469587251466e-07, |
|
"loss": 0.0122, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 7.326241134751773, |
|
"grad_norm": 0.057501133531332016, |
|
"learning_rate": 2.8870865523525915e-07, |
|
"loss": 0.0073, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 7.333333333333333, |
|
"grad_norm": 0.09904036670923233, |
|
"learning_rate": 2.6544585859683556e-07, |
|
"loss": 0.0098, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 7.340425531914893, |
|
"grad_norm": 0.2506350874900818, |
|
"learning_rate": 2.431587963971138e-07, |
|
"loss": 0.0082, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 7.347517730496454, |
|
"grad_norm": 0.16516119241714478, |
|
"learning_rate": 2.218476866774344e-07, |
|
"loss": 0.0116, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 7.3546099290780145, |
|
"grad_norm": 0.18242092430591583, |
|
"learning_rate": 2.015127379310422e-07, |
|
"loss": 0.0145, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 7.361702127659575, |
|
"grad_norm": 0.1638125777244568, |
|
"learning_rate": 1.821541491011547e-07, |
|
"loss": 0.0089, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 7.368794326241135, |
|
"grad_norm": 0.192805677652359, |
|
"learning_rate": 1.6377210957888579e-07, |
|
"loss": 0.0138, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 7.375886524822695, |
|
"grad_norm": 0.16740106046199799, |
|
"learning_rate": 1.4636679920152496e-07, |
|
"loss": 0.0127, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 7.382978723404255, |
|
"grad_norm": 0.20716732740402222, |
|
"learning_rate": 1.2993838825066107e-07, |
|
"loss": 0.0104, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 7.390070921985815, |
|
"grad_norm": 0.2054859846830368, |
|
"learning_rate": 1.1448703745061684e-07, |
|
"loss": 0.0121, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 7.397163120567376, |
|
"grad_norm": 0.19400155544281006, |
|
"learning_rate": 1.0001289796678359e-07, |
|
"loss": 0.0158, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 7.404255319148936, |
|
"grad_norm": 0.257386714220047, |
|
"learning_rate": 8.651611140423344e-08, |
|
"loss": 0.0125, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 7.411347517730497, |
|
"grad_norm": 0.1650981456041336, |
|
"learning_rate": 7.399680980624268e-08, |
|
"loss": 0.0131, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 7.418439716312057, |
|
"grad_norm": 0.07822942733764648, |
|
"learning_rate": 6.24551156530817e-08, |
|
"loss": 0.0107, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 7.425531914893617, |
|
"grad_norm": 0.2906341850757599, |
|
"learning_rate": 5.1891141860760387e-08, |
|
"loss": 0.0074, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 7.432624113475177, |
|
"grad_norm": 0.10687454789876938, |
|
"learning_rate": 4.230499177994007e-08, |
|
"loss": 0.0099, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 7.439716312056738, |
|
"grad_norm": 0.22731392085552216, |
|
"learning_rate": 3.369675919495663e-08, |
|
"loss": 0.012, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"grad_norm": 0.0800376906991005, |
|
"learning_rate": 2.6066528322832294e-08, |
|
"loss": 0.0108, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.453900709219858, |
|
"grad_norm": 0.26024994254112244, |
|
"learning_rate": 1.9414373812509655e-08, |
|
"loss": 0.0159, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 7.460992907801418, |
|
"grad_norm": 0.14578887820243835, |
|
"learning_rate": 1.3740360744118886e-08, |
|
"loss": 0.0163, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 7.468085106382979, |
|
"grad_norm": 0.12513095140457153, |
|
"learning_rate": 9.04454462830051e-09, |
|
"loss": 0.0114, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 7.475177304964539, |
|
"grad_norm": 0.20727184414863586, |
|
"learning_rate": 5.326971405694714e-09, |
|
"loss": 0.0126, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 7.4822695035460995, |
|
"grad_norm": 0.22372283041477203, |
|
"learning_rate": 2.5876774464972387e-09, |
|
"loss": 0.0101, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 7.48936170212766, |
|
"grad_norm": 0.26353222131729126, |
|
"learning_rate": 8.266895500708138e-10, |
|
"loss": 0.011, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 7.49645390070922, |
|
"grad_norm": 0.1274196356534958, |
|
"learning_rate": 4.402494471200669e-11, |
|
"loss": 0.0129, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 7.498581560283688, |
|
"step": 10573, |
|
"total_flos": 3.707114868479735e+17, |
|
"train_loss": 0.02894638727507956, |
|
"train_runtime": 4663.694, |
|
"train_samples_per_second": 36.273, |
|
"train_steps_per_second": 2.267 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10573, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.707114868479735e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|