DCFormer_SigLIP / trainer_state.json
MagicXin's picture
Upload folder using huggingface_hub
ba97537 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"eval_steps": 900,
"global_step": 22500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10222222222222223,
"grad_norm": 6382.4189453125,
"learning_rate": 3.4074074074074077e-06,
"loss": 306.5843,
"step": 23
},
{
"epoch": 0.20444444444444446,
"grad_norm": 154.1435546875,
"learning_rate": 6.814814814814815e-06,
"loss": 62.983,
"step": 46
},
{
"epoch": 0.30666666666666664,
"grad_norm": 10.364509582519531,
"learning_rate": 1.0222222222222223e-05,
"loss": 7.6227,
"step": 69
},
{
"epoch": 0.4088888888888889,
"grad_norm": 13.348139762878418,
"learning_rate": 1.362962962962963e-05,
"loss": 7.322,
"step": 92
},
{
"epoch": 0.5111111111111111,
"grad_norm": 1.03432297706604,
"learning_rate": 1.7037037037037038e-05,
"loss": 7.2767,
"step": 115
},
{
"epoch": 0.6133333333333333,
"grad_norm": 2.311262369155884,
"learning_rate": 2.0444444444444446e-05,
"loss": 7.2605,
"step": 138
},
{
"epoch": 0.7155555555555555,
"grad_norm": 1.2174512147903442,
"learning_rate": 2.3851851851851854e-05,
"loss": 7.2589,
"step": 161
},
{
"epoch": 0.8177777777777778,
"grad_norm": 1.1917160749435425,
"learning_rate": 2.725925925925926e-05,
"loss": 7.2573,
"step": 184
},
{
"epoch": 0.92,
"grad_norm": 0.802689254283905,
"learning_rate": 3.066666666666667e-05,
"loss": 7.2555,
"step": 207
},
{
"epoch": 1.0222222222222221,
"grad_norm": 0.8915572762489319,
"learning_rate": 3.4074074074074077e-05,
"loss": 7.2539,
"step": 230
},
{
"epoch": 1.1244444444444444,
"grad_norm": 1.1943933963775635,
"learning_rate": 3.7481481481481484e-05,
"loss": 7.2509,
"step": 253
},
{
"epoch": 1.2266666666666666,
"grad_norm": 0.9069448709487915,
"learning_rate": 4.088888888888889e-05,
"loss": 7.2492,
"step": 276
},
{
"epoch": 1.3288888888888888,
"grad_norm": 0.7575523853302002,
"learning_rate": 4.42962962962963e-05,
"loss": 7.2472,
"step": 299
},
{
"epoch": 1.431111111111111,
"grad_norm": 0.5182924866676331,
"learning_rate": 4.770370370370371e-05,
"loss": 7.2453,
"step": 322
},
{
"epoch": 1.5333333333333332,
"grad_norm": 0.5943706631660461,
"learning_rate": 5.111111111111111e-05,
"loss": 7.2433,
"step": 345
},
{
"epoch": 1.6355555555555554,
"grad_norm": 0.5987505912780762,
"learning_rate": 5.451851851851852e-05,
"loss": 7.2415,
"step": 368
},
{
"epoch": 1.7377777777777776,
"grad_norm": 0.8691719770431519,
"learning_rate": 5.792592592592593e-05,
"loss": 7.2388,
"step": 391
},
{
"epoch": 1.8399999999999999,
"grad_norm": 0.9830289483070374,
"learning_rate": 6.133333333333334e-05,
"loss": 7.2337,
"step": 414
},
{
"epoch": 1.942222222222222,
"grad_norm": 0.9140754342079163,
"learning_rate": 6.474074074074075e-05,
"loss": 7.2245,
"step": 437
},
{
"epoch": 2.0444444444444443,
"grad_norm": 1.7775914669036865,
"learning_rate": 6.814814814814815e-05,
"loss": 7.2109,
"step": 460
},
{
"epoch": 2.1466666666666665,
"grad_norm": 1.317034363746643,
"learning_rate": 7.155555555555555e-05,
"loss": 7.1871,
"step": 483
},
{
"epoch": 2.2488888888888887,
"grad_norm": 1.6885732412338257,
"learning_rate": 7.496296296296297e-05,
"loss": 7.1349,
"step": 506
},
{
"epoch": 2.351111111111111,
"grad_norm": 2.464526653289795,
"learning_rate": 7.837037037037037e-05,
"loss": 7.0371,
"step": 529
},
{
"epoch": 2.453333333333333,
"grad_norm": 3.4147374629974365,
"learning_rate": 8.177777777777778e-05,
"loss": 6.9365,
"step": 552
},
{
"epoch": 2.5555555555555554,
"grad_norm": 3.6264212131500244,
"learning_rate": 8.518518518518518e-05,
"loss": 6.8548,
"step": 575
},
{
"epoch": 2.6577777777777776,
"grad_norm": 2.2979955673217773,
"learning_rate": 8.85925925925926e-05,
"loss": 6.7637,
"step": 598
},
{
"epoch": 2.76,
"grad_norm": 2.969346046447754,
"learning_rate": 9.200000000000001e-05,
"loss": 6.6778,
"step": 621
},
{
"epoch": 2.862222222222222,
"grad_norm": 4.26610803604126,
"learning_rate": 9.540740740740741e-05,
"loss": 6.5954,
"step": 644
},
{
"epoch": 2.964444444444444,
"grad_norm": 1.7684084177017212,
"learning_rate": 9.881481481481482e-05,
"loss": 6.5164,
"step": 667
},
{
"epoch": 3.066666666666667,
"grad_norm": 3.720853090286255,
"learning_rate": 9.999988344964554e-05,
"loss": 6.4356,
"step": 690
},
{
"epoch": 3.168888888888889,
"grad_norm": 2.5611510276794434,
"learning_rate": 9.99992520072995e-05,
"loss": 6.3594,
"step": 713
},
{
"epoch": 3.2711111111111113,
"grad_norm": 5.3843255043029785,
"learning_rate": 9.999807252777301e-05,
"loss": 6.3057,
"step": 736
},
{
"epoch": 3.3733333333333335,
"grad_norm": 4.412026882171631,
"learning_rate": 9.999634502399426e-05,
"loss": 6.25,
"step": 759
},
{
"epoch": 3.4755555555555557,
"grad_norm": 3.188660144805908,
"learning_rate": 9.999406951489825e-05,
"loss": 6.1975,
"step": 782
},
{
"epoch": 3.5777777777777775,
"grad_norm": 4.5765156745910645,
"learning_rate": 9.999124602542662e-05,
"loss": 6.1516,
"step": 805
},
{
"epoch": 3.68,
"grad_norm": 5.967836856842041,
"learning_rate": 9.998787458652739e-05,
"loss": 6.1038,
"step": 828
},
{
"epoch": 3.7822222222222224,
"grad_norm": 6.038416385650635,
"learning_rate": 9.998395523515457e-05,
"loss": 6.078,
"step": 851
},
{
"epoch": 3.8844444444444446,
"grad_norm": 2.577953577041626,
"learning_rate": 9.997948801426783e-05,
"loss": 6.0297,
"step": 874
},
{
"epoch": 3.986666666666667,
"grad_norm": 3.8739564418792725,
"learning_rate": 9.997447297283196e-05,
"loss": 5.9847,
"step": 897
},
{
"epoch": 4.088888888888889,
"grad_norm": 5.759775161743164,
"learning_rate": 9.996891016581633e-05,
"loss": 5.9452,
"step": 920
},
{
"epoch": 4.191111111111111,
"grad_norm": 5.758726596832275,
"learning_rate": 9.996279965419441e-05,
"loss": 5.9283,
"step": 943
},
{
"epoch": 4.293333333333333,
"grad_norm": 3.345691204071045,
"learning_rate": 9.995614150494293e-05,
"loss": 5.8792,
"step": 966
},
{
"epoch": 4.395555555555555,
"grad_norm": 5.426297664642334,
"learning_rate": 9.994893579104123e-05,
"loss": 5.8526,
"step": 989
},
{
"epoch": 4.497777777777777,
"grad_norm": 4.649121284484863,
"learning_rate": 9.994118259147049e-05,
"loss": 5.8266,
"step": 1012
},
{
"epoch": 4.6,
"grad_norm": 5.175451278686523,
"learning_rate": 9.993288199121283e-05,
"loss": 5.8114,
"step": 1035
},
{
"epoch": 4.702222222222222,
"grad_norm": 4.655645370483398,
"learning_rate": 9.992403408125033e-05,
"loss": 5.7801,
"step": 1058
},
{
"epoch": 4.804444444444444,
"grad_norm": 5.830355644226074,
"learning_rate": 9.991463895856414e-05,
"loss": 5.7576,
"step": 1081
},
{
"epoch": 4.906666666666666,
"grad_norm": 3.2799057960510254,
"learning_rate": 9.990469672613331e-05,
"loss": 5.7327,
"step": 1104
},
{
"epoch": 5.0088888888888885,
"grad_norm": 5.891563415527344,
"learning_rate": 9.989420749293375e-05,
"loss": 5.7139,
"step": 1127
},
{
"epoch": 5.111111111111111,
"grad_norm": 6.125003337860107,
"learning_rate": 9.988317137393697e-05,
"loss": 5.6823,
"step": 1150
},
{
"epoch": 5.213333333333333,
"grad_norm": 4.9209442138671875,
"learning_rate": 9.987158849010885e-05,
"loss": 5.6534,
"step": 1173
},
{
"epoch": 5.315555555555555,
"grad_norm": 3.9249610900878906,
"learning_rate": 9.985945896840829e-05,
"loss": 5.6601,
"step": 1196
},
{
"epoch": 5.417777777777777,
"grad_norm": 7.975271701812744,
"learning_rate": 9.984678294178589e-05,
"loss": 5.6278,
"step": 1219
},
{
"epoch": 5.52,
"grad_norm": 4.072458267211914,
"learning_rate": 9.983356054918238e-05,
"loss": 5.6104,
"step": 1242
},
{
"epoch": 5.622222222222222,
"grad_norm": 5.122928142547607,
"learning_rate": 9.981979193552721e-05,
"loss": 5.5991,
"step": 1265
},
{
"epoch": 5.724444444444444,
"grad_norm": 6.029202461242676,
"learning_rate": 9.980547725173685e-05,
"loss": 5.5761,
"step": 1288
},
{
"epoch": 5.826666666666666,
"grad_norm": 4.795958042144775,
"learning_rate": 9.979061665471326e-05,
"loss": 5.5573,
"step": 1311
},
{
"epoch": 5.928888888888888,
"grad_norm": 3.8007431030273438,
"learning_rate": 9.977521030734203e-05,
"loss": 5.5274,
"step": 1334
},
{
"epoch": 6.0311111111111115,
"grad_norm": 5.163888931274414,
"learning_rate": 9.975925837849073e-05,
"loss": 5.5212,
"step": 1357
},
{
"epoch": 6.133333333333334,
"grad_norm": 5.857538223266602,
"learning_rate": 9.9742761043007e-05,
"loss": 5.5039,
"step": 1380
},
{
"epoch": 6.235555555555556,
"grad_norm": 4.817676067352295,
"learning_rate": 9.972571848171657e-05,
"loss": 5.4863,
"step": 1403
},
{
"epoch": 6.337777777777778,
"grad_norm": 4.5216450691223145,
"learning_rate": 9.97081308814214e-05,
"loss": 5.4866,
"step": 1426
},
{
"epoch": 6.44,
"grad_norm": 5.7964630126953125,
"learning_rate": 9.968999843489754e-05,
"loss": 5.4544,
"step": 1449
},
{
"epoch": 6.542222222222223,
"grad_norm": 7.403745174407959,
"learning_rate": 9.967132134089309e-05,
"loss": 5.4383,
"step": 1472
},
{
"epoch": 6.644444444444445,
"grad_norm": 5.906863689422607,
"learning_rate": 9.965209980412593e-05,
"loss": 5.4435,
"step": 1495
},
{
"epoch": 6.746666666666667,
"grad_norm": 4.985208511352539,
"learning_rate": 9.963233403528154e-05,
"loss": 5.4271,
"step": 1518
},
{
"epoch": 6.848888888888889,
"grad_norm": 5.670632839202881,
"learning_rate": 9.96120242510107e-05,
"loss": 5.4023,
"step": 1541
},
{
"epoch": 6.9511111111111115,
"grad_norm": 4.155480861663818,
"learning_rate": 9.959117067392709e-05,
"loss": 5.3781,
"step": 1564
},
{
"epoch": 7.053333333333334,
"grad_norm": 6.202167987823486,
"learning_rate": 9.95697735326048e-05,
"loss": 5.3696,
"step": 1587
},
{
"epoch": 7.155555555555556,
"grad_norm": 5.649682998657227,
"learning_rate": 9.954783306157595e-05,
"loss": 5.3255,
"step": 1610
},
{
"epoch": 7.257777777777778,
"grad_norm": 6.699223518371582,
"learning_rate": 9.952534950132802e-05,
"loss": 5.3186,
"step": 1633
},
{
"epoch": 7.36,
"grad_norm": 5.433987140655518,
"learning_rate": 9.95023230983012e-05,
"loss": 5.3147,
"step": 1656
},
{
"epoch": 7.4622222222222225,
"grad_norm": 4.822690010070801,
"learning_rate": 9.947875410488581e-05,
"loss": 5.3022,
"step": 1679
},
{
"epoch": 7.564444444444445,
"grad_norm": 5.345188617706299,
"learning_rate": 9.945464277941939e-05,
"loss": 5.2828,
"step": 1702
},
{
"epoch": 7.666666666666667,
"grad_norm": 4.902531623840332,
"learning_rate": 9.942998938618394e-05,
"loss": 5.2818,
"step": 1725
},
{
"epoch": 7.768888888888889,
"grad_norm": 7.8368353843688965,
"learning_rate": 9.940479419540304e-05,
"loss": 5.2735,
"step": 1748
},
{
"epoch": 7.871111111111111,
"grad_norm": 5.669989585876465,
"learning_rate": 9.937905748323883e-05,
"loss": 5.2554,
"step": 1771
},
{
"epoch": 7.973333333333334,
"grad_norm": 4.463327407836914,
"learning_rate": 9.935277953178905e-05,
"loss": 5.2421,
"step": 1794
},
{
"epoch": 8.075555555555555,
"grad_norm": 4.2700629234313965,
"learning_rate": 9.93259606290839e-05,
"loss": 5.1956,
"step": 1817
},
{
"epoch": 8.177777777777777,
"grad_norm": 5.543842315673828,
"learning_rate": 9.929860106908289e-05,
"loss": 5.1719,
"step": 1840
},
{
"epoch": 8.28,
"grad_norm": 10.465546607971191,
"learning_rate": 9.927070115167161e-05,
"loss": 5.1691,
"step": 1863
},
{
"epoch": 8.382222222222222,
"grad_norm": 5.517487525939941,
"learning_rate": 9.924226118265849e-05,
"loss": 5.1431,
"step": 1886
},
{
"epoch": 8.484444444444444,
"grad_norm": 6.022068977355957,
"learning_rate": 9.921328147377143e-05,
"loss": 5.1507,
"step": 1909
},
{
"epoch": 8.586666666666666,
"grad_norm": 4.770472526550293,
"learning_rate": 9.918376234265428e-05,
"loss": 5.1385,
"step": 1932
},
{
"epoch": 8.688888888888888,
"grad_norm": 6.177302360534668,
"learning_rate": 9.915370411286356e-05,
"loss": 5.1091,
"step": 1955
},
{
"epoch": 8.79111111111111,
"grad_norm": 6.306371688842773,
"learning_rate": 9.912310711386473e-05,
"loss": 5.1276,
"step": 1978
},
{
"epoch": 8.893333333333333,
"grad_norm": 7.086174488067627,
"learning_rate": 9.909197168102867e-05,
"loss": 5.0997,
"step": 2001
},
{
"epoch": 8.995555555555555,
"grad_norm": 5.590447902679443,
"learning_rate": 9.906029815562797e-05,
"loss": 5.0776,
"step": 2024
},
{
"epoch": 9.097777777777777,
"grad_norm": 4.786597728729248,
"learning_rate": 9.902808688483323e-05,
"loss": 5.0244,
"step": 2047
},
{
"epoch": 9.2,
"grad_norm": 7.7961015701293945,
"learning_rate": 9.899533822170922e-05,
"loss": 5.0232,
"step": 2070
},
{
"epoch": 9.302222222222222,
"grad_norm": 5.857214450836182,
"learning_rate": 9.896205252521099e-05,
"loss": 5.0213,
"step": 2093
},
{
"epoch": 9.404444444444444,
"grad_norm": 6.194970607757568,
"learning_rate": 9.892823016017999e-05,
"loss": 4.984,
"step": 2116
},
{
"epoch": 9.506666666666666,
"grad_norm": 7.040445804595947,
"learning_rate": 9.889387149734004e-05,
"loss": 4.9845,
"step": 2139
},
{
"epoch": 9.608888888888888,
"grad_norm": 6.245872497558594,
"learning_rate": 9.885897691329327e-05,
"loss": 4.9771,
"step": 2162
},
{
"epoch": 9.71111111111111,
"grad_norm": 4.590968608856201,
"learning_rate": 9.882354679051598e-05,
"loss": 4.9565,
"step": 2185
},
{
"epoch": 9.813333333333333,
"grad_norm": 5.94847297668457,
"learning_rate": 9.87875815173545e-05,
"loss": 4.9531,
"step": 2208
},
{
"epoch": 9.915555555555555,
"grad_norm": 8.10450267791748,
"learning_rate": 9.875108148802082e-05,
"loss": 4.9557,
"step": 2231
},
{
"epoch": 10.017777777777777,
"grad_norm": 5.512363910675049,
"learning_rate": 9.871404710258841e-05,
"loss": 4.9295,
"step": 2254
},
{
"epoch": 10.12,
"grad_norm": 5.455718517303467,
"learning_rate": 9.867647876698775e-05,
"loss": 4.8753,
"step": 2277
},
{
"epoch": 10.222222222222221,
"grad_norm": 5.959130764007568,
"learning_rate": 9.86383768930019e-05,
"loss": 4.8732,
"step": 2300
},
{
"epoch": 10.324444444444444,
"grad_norm": 6.239514350891113,
"learning_rate": 9.859974189826198e-05,
"loss": 4.8707,
"step": 2323
},
{
"epoch": 10.426666666666666,
"grad_norm": 7.127731800079346,
"learning_rate": 9.856057420624259e-05,
"loss": 4.846,
"step": 2346
},
{
"epoch": 10.528888888888888,
"grad_norm": 6.327420234680176,
"learning_rate": 9.852087424625717e-05,
"loss": 4.8457,
"step": 2369
},
{
"epoch": 10.63111111111111,
"grad_norm": 6.398340225219727,
"learning_rate": 9.848064245345333e-05,
"loss": 4.8295,
"step": 2392
},
{
"epoch": 10.733333333333333,
"grad_norm": 5.890859603881836,
"learning_rate": 9.843987926880803e-05,
"loss": 4.8091,
"step": 2415
},
{
"epoch": 10.835555555555555,
"grad_norm": 7.191392421722412,
"learning_rate": 9.839858513912276e-05,
"loss": 4.8022,
"step": 2438
},
{
"epoch": 10.937777777777779,
"grad_norm": 6.238222122192383,
"learning_rate": 9.835676051701867e-05,
"loss": 4.7898,
"step": 2461
},
{
"epoch": 11.04,
"grad_norm": 6.7246551513671875,
"learning_rate": 9.831440586093157e-05,
"loss": 4.7692,
"step": 2484
},
{
"epoch": 11.142222222222221,
"grad_norm": 5.07949161529541,
"learning_rate": 9.827152163510693e-05,
"loss": 4.7251,
"step": 2507
},
{
"epoch": 11.244444444444444,
"grad_norm": 7.340390682220459,
"learning_rate": 9.82281083095948e-05,
"loss": 4.7188,
"step": 2530
},
{
"epoch": 11.346666666666668,
"grad_norm": 5.695153713226318,
"learning_rate": 9.818416636024461e-05,
"loss": 4.7111,
"step": 2553
},
{
"epoch": 11.448888888888888,
"grad_norm": 5.70296573638916,
"learning_rate": 9.813969626870002e-05,
"loss": 4.7043,
"step": 2576
},
{
"epoch": 11.551111111111112,
"grad_norm": 5.775058269500732,
"learning_rate": 9.809469852239359e-05,
"loss": 4.6924,
"step": 2599
},
{
"epoch": 11.653333333333332,
"grad_norm": 7.319630146026611,
"learning_rate": 9.804917361454145e-05,
"loss": 4.6848,
"step": 2622
},
{
"epoch": 11.755555555555556,
"grad_norm": 7.945709705352783,
"learning_rate": 9.800312204413793e-05,
"loss": 4.6667,
"step": 2645
},
{
"epoch": 11.857777777777777,
"grad_norm": 7.591863632202148,
"learning_rate": 9.795654431595e-05,
"loss": 4.6778,
"step": 2668
},
{
"epoch": 11.96,
"grad_norm": 6.433276653289795,
"learning_rate": 9.790944094051187e-05,
"loss": 4.6699,
"step": 2691
},
{
"epoch": 12.062222222222223,
"grad_norm": 6.956933975219727,
"learning_rate": 9.786181243411926e-05,
"loss": 4.6113,
"step": 2714
},
{
"epoch": 12.164444444444445,
"grad_norm": 5.551136016845703,
"learning_rate": 9.781365931882387e-05,
"loss": 4.582,
"step": 2737
},
{
"epoch": 12.266666666666667,
"grad_norm": 7.214599609375,
"learning_rate": 9.776498212242749e-05,
"loss": 4.5932,
"step": 2760
},
{
"epoch": 12.36888888888889,
"grad_norm": 6.5685715675354,
"learning_rate": 9.771578137847639e-05,
"loss": 4.5896,
"step": 2783
},
{
"epoch": 12.471111111111112,
"grad_norm": 8.017729759216309,
"learning_rate": 9.766605762625541e-05,
"loss": 4.5579,
"step": 2806
},
{
"epoch": 12.573333333333334,
"grad_norm": 6.021265983581543,
"learning_rate": 9.761581141078194e-05,
"loss": 4.5715,
"step": 2829
},
{
"epoch": 12.675555555555556,
"grad_norm": 7.427931785583496,
"learning_rate": 9.756504328280016e-05,
"loss": 4.5681,
"step": 2852
},
{
"epoch": 12.777777777777779,
"grad_norm": 6.325420379638672,
"learning_rate": 9.751375379877481e-05,
"loss": 4.5695,
"step": 2875
},
{
"epoch": 12.88,
"grad_norm": 4.837381839752197,
"learning_rate": 9.746194352088518e-05,
"loss": 4.5321,
"step": 2898
},
{
"epoch": 12.982222222222223,
"grad_norm": 6.933470726013184,
"learning_rate": 9.740961301701894e-05,
"loss": 4.5286,
"step": 2921
},
{
"epoch": 13.084444444444445,
"grad_norm": 5.810832977294922,
"learning_rate": 9.73567628607659e-05,
"loss": 4.463,
"step": 2944
},
{
"epoch": 13.186666666666667,
"grad_norm": 7.62490177154541,
"learning_rate": 9.730339363141175e-05,
"loss": 4.462,
"step": 2967
},
{
"epoch": 13.28888888888889,
"grad_norm": 6.67575216293335,
"learning_rate": 9.72495059139317e-05,
"loss": 4.4402,
"step": 2990
},
{
"epoch": 13.391111111111112,
"grad_norm": 6.110825538635254,
"learning_rate": 9.719510029898398e-05,
"loss": 4.443,
"step": 3013
},
{
"epoch": 13.493333333333334,
"grad_norm": 7.317692279815674,
"learning_rate": 9.714017738290358e-05,
"loss": 4.4456,
"step": 3036
},
{
"epoch": 13.595555555555556,
"grad_norm": 6.189058303833008,
"learning_rate": 9.708473776769544e-05,
"loss": 4.4524,
"step": 3059
},
{
"epoch": 13.697777777777778,
"grad_norm": 5.6017632484436035,
"learning_rate": 9.702878206102811e-05,
"loss": 4.4234,
"step": 3082
},
{
"epoch": 13.8,
"grad_norm": 5.7952189445495605,
"learning_rate": 9.697231087622691e-05,
"loss": 4.4016,
"step": 3105
},
{
"epoch": 13.902222222222223,
"grad_norm": 5.7486677169799805,
"learning_rate": 9.691532483226723e-05,
"loss": 4.4106,
"step": 3128
},
{
"epoch": 14.004444444444445,
"grad_norm": 6.603976249694824,
"learning_rate": 9.68578245537679e-05,
"loss": 4.367,
"step": 3151
},
{
"epoch": 14.106666666666667,
"grad_norm": 6.593631744384766,
"learning_rate": 9.679981067098414e-05,
"loss": 4.3122,
"step": 3174
},
{
"epoch": 14.20888888888889,
"grad_norm": 6.519464015960693,
"learning_rate": 9.674128381980072e-05,
"loss": 4.3038,
"step": 3197
},
{
"epoch": 14.311111111111112,
"grad_norm": 7.691238880157471,
"learning_rate": 9.668224464172508e-05,
"loss": 4.3305,
"step": 3220
},
{
"epoch": 14.413333333333334,
"grad_norm": 5.136379718780518,
"learning_rate": 9.66226937838802e-05,
"loss": 4.3137,
"step": 3243
},
{
"epoch": 14.515555555555556,
"grad_norm": 5.727292537689209,
"learning_rate": 9.65626318989975e-05,
"loss": 4.3126,
"step": 3266
},
{
"epoch": 14.617777777777778,
"grad_norm": 7.882863998413086,
"learning_rate": 9.650205964540978e-05,
"loss": 4.2942,
"step": 3289
},
{
"epoch": 14.72,
"grad_norm": 7.945621013641357,
"learning_rate": 9.64409776870439e-05,
"loss": 4.3076,
"step": 3312
},
{
"epoch": 14.822222222222223,
"grad_norm": 6.543049335479736,
"learning_rate": 9.637938669341356e-05,
"loss": 4.2815,
"step": 3335
},
{
"epoch": 14.924444444444445,
"grad_norm": 5.685489654541016,
"learning_rate": 9.631728733961194e-05,
"loss": 4.2873,
"step": 3358
},
{
"epoch": 15.026666666666667,
"grad_norm": 5.528294563293457,
"learning_rate": 9.625468030630432e-05,
"loss": 4.2617,
"step": 3381
},
{
"epoch": 15.12888888888889,
"grad_norm": 7.666279315948486,
"learning_rate": 9.619156627972064e-05,
"loss": 4.2157,
"step": 3404
},
{
"epoch": 15.231111111111112,
"grad_norm": 6.189380645751953,
"learning_rate": 9.612794595164786e-05,
"loss": 4.207,
"step": 3427
},
{
"epoch": 15.333333333333334,
"grad_norm": 6.782273292541504,
"learning_rate": 9.606382001942255e-05,
"loss": 4.1977,
"step": 3450
},
{
"epoch": 15.435555555555556,
"grad_norm": 6.819105625152588,
"learning_rate": 9.599918918592313e-05,
"loss": 4.2046,
"step": 3473
},
{
"epoch": 15.537777777777778,
"grad_norm": 6.558395862579346,
"learning_rate": 9.593405415956216e-05,
"loss": 4.1959,
"step": 3496
},
{
"epoch": 15.64,
"grad_norm": 7.579700946807861,
"learning_rate": 9.58684156542787e-05,
"loss": 4.2004,
"step": 3519
},
{
"epoch": 15.742222222222223,
"grad_norm": 5.998022556304932,
"learning_rate": 9.580227438953028e-05,
"loss": 4.1972,
"step": 3542
},
{
"epoch": 15.844444444444445,
"grad_norm": 8.631059646606445,
"learning_rate": 9.573563109028523e-05,
"loss": 4.1674,
"step": 3565
},
{
"epoch": 15.946666666666667,
"grad_norm": 6.702101230621338,
"learning_rate": 9.566848648701457e-05,
"loss": 4.1303,
"step": 3588
},
{
"epoch": 16.04888888888889,
"grad_norm": 7.247947692871094,
"learning_rate": 9.56008413156841e-05,
"loss": 4.0834,
"step": 3611
},
{
"epoch": 16.15111111111111,
"grad_norm": 6.5919575691223145,
"learning_rate": 9.553269631774631e-05,
"loss": 4.0488,
"step": 3634
},
{
"epoch": 16.253333333333334,
"grad_norm": 8.66784381866455,
"learning_rate": 9.54640522401322e-05,
"loss": 4.0754,
"step": 3657
},
{
"epoch": 16.355555555555554,
"grad_norm": 7.605900764465332,
"learning_rate": 9.539490983524316e-05,
"loss": 4.0721,
"step": 3680
},
{
"epoch": 16.45777777777778,
"grad_norm": 7.925562381744385,
"learning_rate": 9.532526986094273e-05,
"loss": 4.0685,
"step": 3703
},
{
"epoch": 16.56,
"grad_norm": 7.180625915527344,
"learning_rate": 9.525513308054819e-05,
"loss": 4.0581,
"step": 3726
},
{
"epoch": 16.662222222222223,
"grad_norm": 6.261662483215332,
"learning_rate": 9.518450026282233e-05,
"loss": 4.0405,
"step": 3749
},
{
"epoch": 16.764444444444443,
"grad_norm": 5.739262580871582,
"learning_rate": 9.511337218196494e-05,
"loss": 4.0315,
"step": 3772
},
{
"epoch": 16.866666666666667,
"grad_norm": 6.229343891143799,
"learning_rate": 9.504174961760435e-05,
"loss": 4.036,
"step": 3795
},
{
"epoch": 16.968888888888888,
"grad_norm": 7.991888046264648,
"learning_rate": 9.496963335478884e-05,
"loss": 4.0707,
"step": 3818
},
{
"epoch": 17.07111111111111,
"grad_norm": 5.881919860839844,
"learning_rate": 9.489702418397814e-05,
"loss": 3.9782,
"step": 3841
},
{
"epoch": 17.173333333333332,
"grad_norm": 5.624960899353027,
"learning_rate": 9.482392290103462e-05,
"loss": 3.9473,
"step": 3864
},
{
"epoch": 17.275555555555556,
"grad_norm": 5.786345481872559,
"learning_rate": 9.475033030721471e-05,
"loss": 3.9561,
"step": 3887
},
{
"epoch": 17.377777777777776,
"grad_norm": 7.602824687957764,
"learning_rate": 9.467624720916002e-05,
"loss": 3.9605,
"step": 3910
},
{
"epoch": 17.48,
"grad_norm": 6.39411735534668,
"learning_rate": 9.460167441888854e-05,
"loss": 3.9324,
"step": 3933
},
{
"epoch": 17.58222222222222,
"grad_norm": 6.903740882873535,
"learning_rate": 9.452661275378576e-05,
"loss": 3.9302,
"step": 3956
},
{
"epoch": 17.684444444444445,
"grad_norm": 7.515189170837402,
"learning_rate": 9.445106303659562e-05,
"loss": 3.911,
"step": 3979
},
{
"epoch": 17.786666666666665,
"grad_norm": 6.514119625091553,
"learning_rate": 9.43750260954116e-05,
"loss": 3.9168,
"step": 4002
},
{
"epoch": 17.88888888888889,
"grad_norm": 5.5810370445251465,
"learning_rate": 9.429850276366758e-05,
"loss": 3.9236,
"step": 4025
},
{
"epoch": 17.99111111111111,
"grad_norm": 6.529542446136475,
"learning_rate": 9.422149388012875e-05,
"loss": 3.9076,
"step": 4048
},
{
"epoch": 18.093333333333334,
"grad_norm": 5.167507171630859,
"learning_rate": 9.414400028888235e-05,
"loss": 3.8211,
"step": 4071
},
{
"epoch": 18.195555555555554,
"grad_norm": 6.467238426208496,
"learning_rate": 9.406602283932845e-05,
"loss": 3.8423,
"step": 4094
},
{
"epoch": 18.297777777777778,
"grad_norm": 7.490845203399658,
"learning_rate": 9.398756238617071e-05,
"loss": 3.8308,
"step": 4117
},
{
"epoch": 18.4,
"grad_norm": 5.916659832000732,
"learning_rate": 9.390861978940686e-05,
"loss": 3.8273,
"step": 4140
},
{
"epoch": 18.502222222222223,
"grad_norm": 6.601635456085205,
"learning_rate": 9.382919591431945e-05,
"loss": 3.8316,
"step": 4163
},
{
"epoch": 18.604444444444443,
"grad_norm": 7.86677885055542,
"learning_rate": 9.374929163146621e-05,
"loss": 3.8223,
"step": 4186
},
{
"epoch": 18.706666666666667,
"grad_norm": 6.863983154296875,
"learning_rate": 9.36689078166706e-05,
"loss": 3.8244,
"step": 4209
},
{
"epoch": 18.808888888888887,
"grad_norm": 6.3487467765808105,
"learning_rate": 9.35880453510122e-05,
"loss": 3.7945,
"step": 4232
},
{
"epoch": 18.91111111111111,
"grad_norm": 7.521273612976074,
"learning_rate": 9.350670512081702e-05,
"loss": 3.8077,
"step": 4255
},
{
"epoch": 19.013333333333332,
"grad_norm": 7.834831714630127,
"learning_rate": 9.34248880176478e-05,
"loss": 3.7712,
"step": 4278
},
{
"epoch": 19.115555555555556,
"grad_norm": 6.245793342590332,
"learning_rate": 9.334259493829423e-05,
"loss": 3.6992,
"step": 4301
},
{
"epoch": 19.217777777777776,
"grad_norm": 7.780862808227539,
"learning_rate": 9.325982678476317e-05,
"loss": 3.6929,
"step": 4324
},
{
"epoch": 19.32,
"grad_norm": 7.378338813781738,
"learning_rate": 9.317658446426871e-05,
"loss": 3.7204,
"step": 4347
},
{
"epoch": 19.42222222222222,
"grad_norm": 6.953887462615967,
"learning_rate": 9.309286888922219e-05,
"loss": 3.7305,
"step": 4370
},
{
"epoch": 19.524444444444445,
"grad_norm": 6.669604301452637,
"learning_rate": 9.300868097722235e-05,
"loss": 3.7116,
"step": 4393
},
{
"epoch": 19.626666666666665,
"grad_norm": 6.265507221221924,
"learning_rate": 9.292402165104506e-05,
"loss": 3.6961,
"step": 4416
},
{
"epoch": 19.72888888888889,
"grad_norm": 6.823009967803955,
"learning_rate": 9.28388918386334e-05,
"loss": 3.6913,
"step": 4439
},
{
"epoch": 19.83111111111111,
"grad_norm": 6.928945064544678,
"learning_rate": 9.275329247308737e-05,
"loss": 3.7144,
"step": 4462
},
{
"epoch": 19.933333333333334,
"grad_norm": 7.16089391708374,
"learning_rate": 9.26672244926537e-05,
"loss": 3.7168,
"step": 4485
},
{
"epoch": 20.035555555555554,
"grad_norm": 6.4695563316345215,
"learning_rate": 9.258068884071559e-05,
"loss": 3.655,
"step": 4508
},
{
"epoch": 20.137777777777778,
"grad_norm": 6.902811527252197,
"learning_rate": 9.249368646578227e-05,
"loss": 3.5855,
"step": 4531
},
{
"epoch": 20.24,
"grad_norm": 7.336968898773193,
"learning_rate": 9.24062183214788e-05,
"loss": 3.5716,
"step": 4554
},
{
"epoch": 20.342222222222222,
"grad_norm": 6.539813995361328,
"learning_rate": 9.231828536653537e-05,
"loss": 3.6035,
"step": 4577
},
{
"epoch": 20.444444444444443,
"grad_norm": 8.689528465270996,
"learning_rate": 9.222988856477702e-05,
"loss": 3.6179,
"step": 4600
},
{
"epoch": 20.546666666666667,
"grad_norm": 8.209162712097168,
"learning_rate": 9.214102888511287e-05,
"loss": 3.6182,
"step": 4623
},
{
"epoch": 20.648888888888887,
"grad_norm": 7.320056438446045,
"learning_rate": 9.20517073015257e-05,
"loss": 3.5944,
"step": 4646
},
{
"epoch": 20.75111111111111,
"grad_norm": 7.204301357269287,
"learning_rate": 9.196192479306114e-05,
"loss": 3.5922,
"step": 4669
},
{
"epoch": 20.85333333333333,
"grad_norm": 7.85291051864624,
"learning_rate": 9.187168234381692e-05,
"loss": 3.5992,
"step": 4692
},
{
"epoch": 20.955555555555556,
"grad_norm": 6.276856422424316,
"learning_rate": 9.178098094293222e-05,
"loss": 3.5929,
"step": 4715
},
{
"epoch": 21.057777777777776,
"grad_norm": 5.835750102996826,
"learning_rate": 9.168982158457672e-05,
"loss": 3.5289,
"step": 4738
},
{
"epoch": 21.16,
"grad_norm": 6.322780609130859,
"learning_rate": 9.159820526793969e-05,
"loss": 3.4881,
"step": 4761
},
{
"epoch": 21.26222222222222,
"grad_norm": 7.341971397399902,
"learning_rate": 9.150613299721916e-05,
"loss": 3.4799,
"step": 4784
},
{
"epoch": 21.364444444444445,
"grad_norm": 6.387499809265137,
"learning_rate": 9.14136057816107e-05,
"loss": 3.4747,
"step": 4807
},
{
"epoch": 21.466666666666665,
"grad_norm": 7.271056175231934,
"learning_rate": 9.132062463529665e-05,
"loss": 3.4783,
"step": 4830
},
{
"epoch": 21.56888888888889,
"grad_norm": 7.323966026306152,
"learning_rate": 9.122719057743473e-05,
"loss": 3.4756,
"step": 4853
},
{
"epoch": 21.67111111111111,
"grad_norm": 7.535403251647949,
"learning_rate": 9.113330463214699e-05,
"loss": 3.4825,
"step": 4876
},
{
"epoch": 21.773333333333333,
"grad_norm": 5.771243095397949,
"learning_rate": 9.103896782850865e-05,
"loss": 3.4737,
"step": 4899
},
{
"epoch": 21.875555555555554,
"grad_norm": 8.020050048828125,
"learning_rate": 9.094418120053667e-05,
"loss": 3.4709,
"step": 4922
},
{
"epoch": 21.977777777777778,
"grad_norm": 6.315218448638916,
"learning_rate": 9.08489457871785e-05,
"loss": 3.4751,
"step": 4945
},
{
"epoch": 22.08,
"grad_norm": 7.77646017074585,
"learning_rate": 9.075326263230073e-05,
"loss": 3.4052,
"step": 4968
},
{
"epoch": 22.182222222222222,
"grad_norm": 6.549800872802734,
"learning_rate": 9.065713278467755e-05,
"loss": 3.3815,
"step": 4991
},
{
"epoch": 22.284444444444443,
"grad_norm": 6.499227523803711,
"learning_rate": 9.056055729797938e-05,
"loss": 3.3818,
"step": 5014
},
{
"epoch": 22.386666666666667,
"grad_norm": 7.827967643737793,
"learning_rate": 9.046353723076117e-05,
"loss": 3.3781,
"step": 5037
},
{
"epoch": 22.488888888888887,
"grad_norm": 8.61707592010498,
"learning_rate": 9.036607364645094e-05,
"loss": 3.362,
"step": 5060
},
{
"epoch": 22.59111111111111,
"grad_norm": 8.287631034851074,
"learning_rate": 9.026816761333799e-05,
"loss": 3.3951,
"step": 5083
},
{
"epoch": 22.693333333333335,
"grad_norm": 6.027954578399658,
"learning_rate": 9.016982020456133e-05,
"loss": 3.3988,
"step": 5106
},
{
"epoch": 22.795555555555556,
"grad_norm": 5.422713756561279,
"learning_rate": 9.00710324980978e-05,
"loss": 3.3986,
"step": 5129
},
{
"epoch": 22.897777777777776,
"grad_norm": 6.52266788482666,
"learning_rate": 8.997180557675034e-05,
"loss": 3.3685,
"step": 5152
},
{
"epoch": 23.0,
"grad_norm": 8.5319242477417,
"learning_rate": 8.987214052813604e-05,
"loss": 3.3852,
"step": 5175
},
{
"epoch": 23.102222222222224,
"grad_norm": 5.753627300262451,
"learning_rate": 8.977203844467432e-05,
"loss": 3.2861,
"step": 5198
},
{
"epoch": 23.204444444444444,
"grad_norm": 6.238333225250244,
"learning_rate": 8.967150042357484e-05,
"loss": 3.297,
"step": 5221
},
{
"epoch": 23.306666666666665,
"grad_norm": 7.126039505004883,
"learning_rate": 8.957052756682556e-05,
"loss": 3.3114,
"step": 5244
},
{
"epoch": 23.40888888888889,
"grad_norm": 7.4155426025390625,
"learning_rate": 8.946912098118066e-05,
"loss": 3.3054,
"step": 5267
},
{
"epoch": 23.511111111111113,
"grad_norm": 6.702388763427734,
"learning_rate": 8.93672817781483e-05,
"loss": 3.2675,
"step": 5290
},
{
"epoch": 23.613333333333333,
"grad_norm": 7.878185272216797,
"learning_rate": 8.926501107397863e-05,
"loss": 3.2968,
"step": 5313
},
{
"epoch": 23.715555555555554,
"grad_norm": 7.802605152130127,
"learning_rate": 8.916230998965134e-05,
"loss": 3.2743,
"step": 5336
},
{
"epoch": 23.817777777777778,
"grad_norm": 6.080660820007324,
"learning_rate": 8.905917965086356e-05,
"loss": 3.287,
"step": 5359
},
{
"epoch": 23.92,
"grad_norm": 7.292867660522461,
"learning_rate": 8.895562118801738e-05,
"loss": 3.2723,
"step": 5382
},
{
"epoch": 24.022222222222222,
"grad_norm": 6.736908435821533,
"learning_rate": 8.885163573620754e-05,
"loss": 3.2406,
"step": 5405
},
{
"epoch": 24.124444444444446,
"grad_norm": 5.8357343673706055,
"learning_rate": 8.874722443520899e-05,
"loss": 3.1797,
"step": 5428
},
{
"epoch": 24.226666666666667,
"grad_norm": 6.487481117248535,
"learning_rate": 8.864238842946433e-05,
"loss": 3.1876,
"step": 5451
},
{
"epoch": 24.32888888888889,
"grad_norm": 7.854300498962402,
"learning_rate": 8.853712886807132e-05,
"loss": 3.2056,
"step": 5474
},
{
"epoch": 24.43111111111111,
"grad_norm": 8.225058555603027,
"learning_rate": 8.84314469047703e-05,
"loss": 3.2518,
"step": 5497
},
{
"epoch": 24.533333333333335,
"grad_norm": 7.0223236083984375,
"learning_rate": 8.832534369793153e-05,
"loss": 3.2102,
"step": 5520
},
{
"epoch": 24.635555555555555,
"grad_norm": 7.004525661468506,
"learning_rate": 8.821882041054239e-05,
"loss": 3.1674,
"step": 5543
},
{
"epoch": 24.73777777777778,
"grad_norm": 7.304614543914795,
"learning_rate": 8.811187821019486e-05,
"loss": 3.188,
"step": 5566
},
{
"epoch": 24.84,
"grad_norm": 6.002228736877441,
"learning_rate": 8.800451826907245e-05,
"loss": 3.1785,
"step": 5589
},
{
"epoch": 24.942222222222224,
"grad_norm": 6.998710632324219,
"learning_rate": 8.789674176393761e-05,
"loss": 3.1713,
"step": 5612
},
{
"epoch": 25.044444444444444,
"grad_norm": 7.029483795166016,
"learning_rate": 8.77885498761186e-05,
"loss": 3.1521,
"step": 5635
},
{
"epoch": 25.14666666666667,
"grad_norm": 6.024033069610596,
"learning_rate": 8.767994379149675e-05,
"loss": 3.0885,
"step": 5658
},
{
"epoch": 25.24888888888889,
"grad_norm": 7.233892440795898,
"learning_rate": 8.757092470049329e-05,
"loss": 3.0891,
"step": 5681
},
{
"epoch": 25.351111111111113,
"grad_norm": 7.917546272277832,
"learning_rate": 8.74614937980564e-05,
"loss": 3.1085,
"step": 5704
},
{
"epoch": 25.453333333333333,
"grad_norm": 7.8942437171936035,
"learning_rate": 8.735165228364809e-05,
"loss": 3.0931,
"step": 5727
},
{
"epoch": 25.555555555555557,
"grad_norm": 7.184880256652832,
"learning_rate": 8.724140136123106e-05,
"loss": 3.1079,
"step": 5750
},
{
"epoch": 25.657777777777778,
"grad_norm": 5.8746137619018555,
"learning_rate": 8.713074223925546e-05,
"loss": 3.0924,
"step": 5773
},
{
"epoch": 25.76,
"grad_norm": 6.722870826721191,
"learning_rate": 8.701967613064575e-05,
"loss": 3.0918,
"step": 5796
},
{
"epoch": 25.862222222222222,
"grad_norm": 8.777771949768066,
"learning_rate": 8.690820425278721e-05,
"loss": 3.1046,
"step": 5819
},
{
"epoch": 25.964444444444446,
"grad_norm": 7.208896636962891,
"learning_rate": 8.679632782751283e-05,
"loss": 3.1053,
"step": 5842
},
{
"epoch": 26.066666666666666,
"grad_norm": 12.179716110229492,
"learning_rate": 8.668404808108978e-05,
"loss": 3.034,
"step": 5865
},
{
"epoch": 26.16888888888889,
"grad_norm": 6.9270501136779785,
"learning_rate": 8.657136624420596e-05,
"loss": 2.982,
"step": 5888
},
{
"epoch": 26.27111111111111,
"grad_norm": 6.495911598205566,
"learning_rate": 8.645828355195658e-05,
"loss": 2.9953,
"step": 5911
},
{
"epoch": 26.373333333333335,
"grad_norm": 6.193568229675293,
"learning_rate": 8.634480124383057e-05,
"loss": 3.0264,
"step": 5934
},
{
"epoch": 26.475555555555555,
"grad_norm": 7.5366034507751465,
"learning_rate": 8.623092056369704e-05,
"loss": 3.029,
"step": 5957
},
{
"epoch": 26.57777777777778,
"grad_norm": 7.380651473999023,
"learning_rate": 8.611664275979157e-05,
"loss": 3.0148,
"step": 5980
},
{
"epoch": 26.68,
"grad_norm": 6.579084396362305,
"learning_rate": 8.600196908470265e-05,
"loss": 3.0019,
"step": 6003
},
{
"epoch": 26.782222222222224,
"grad_norm": 7.964267253875732,
"learning_rate": 8.588690079535779e-05,
"loss": 3.0102,
"step": 6026
},
{
"epoch": 26.884444444444444,
"grad_norm": 7.465826034545898,
"learning_rate": 8.577143915300993e-05,
"loss": 2.9759,
"step": 6049
},
{
"epoch": 26.986666666666668,
"grad_norm": 6.584536552429199,
"learning_rate": 8.56555854232234e-05,
"loss": 2.9609,
"step": 6072
},
{
"epoch": 27.08888888888889,
"grad_norm": 6.6631550788879395,
"learning_rate": 8.553934087586026e-05,
"loss": 2.8921,
"step": 6095
},
{
"epoch": 27.191111111111113,
"grad_norm": 7.030783176422119,
"learning_rate": 8.542270678506625e-05,
"loss": 2.8946,
"step": 6118
},
{
"epoch": 27.293333333333333,
"grad_norm": 6.412444114685059,
"learning_rate": 8.530568442925684e-05,
"loss": 2.9002,
"step": 6141
},
{
"epoch": 27.395555555555557,
"grad_norm": 8.111526489257812,
"learning_rate": 8.518827509110328e-05,
"loss": 2.9037,
"step": 6164
},
{
"epoch": 27.497777777777777,
"grad_norm": 6.402091026306152,
"learning_rate": 8.507048005751847e-05,
"loss": 2.9006,
"step": 6187
},
{
"epoch": 27.6,
"grad_norm": 7.210970878601074,
"learning_rate": 8.495230061964288e-05,
"loss": 2.911,
"step": 6210
},
{
"epoch": 27.702222222222222,
"grad_norm": 9.301465034484863,
"learning_rate": 8.48337380728304e-05,
"loss": 2.915,
"step": 6233
},
{
"epoch": 27.804444444444446,
"grad_norm": 10.22038745880127,
"learning_rate": 8.471479371663417e-05,
"loss": 2.9234,
"step": 6256
},
{
"epoch": 27.906666666666666,
"grad_norm": 8.557666778564453,
"learning_rate": 8.459546885479226e-05,
"loss": 2.9312,
"step": 6279
},
{
"epoch": 28.00888888888889,
"grad_norm": 8.308337211608887,
"learning_rate": 8.447576479521348e-05,
"loss": 2.9055,
"step": 6302
},
{
"epoch": 28.11111111111111,
"grad_norm": 9.826993942260742,
"learning_rate": 8.435568284996294e-05,
"loss": 2.795,
"step": 6325
},
{
"epoch": 28.213333333333335,
"grad_norm": 7.39091157913208,
"learning_rate": 8.423522433524776e-05,
"loss": 2.7985,
"step": 6348
},
{
"epoch": 28.315555555555555,
"grad_norm": 7.943458557128906,
"learning_rate": 8.411439057140257e-05,
"loss": 2.804,
"step": 6371
},
{
"epoch": 28.41777777777778,
"grad_norm": 7.037588119506836,
"learning_rate": 8.399318288287512e-05,
"loss": 2.8196,
"step": 6394
},
{
"epoch": 28.52,
"grad_norm": 6.966550350189209,
"learning_rate": 8.387160259821166e-05,
"loss": 2.8037,
"step": 6417
},
{
"epoch": 28.622222222222224,
"grad_norm": 6.990281105041504,
"learning_rate": 8.374965105004244e-05,
"loss": 2.8049,
"step": 6440
},
{
"epoch": 28.724444444444444,
"grad_norm": 8.029483795166016,
"learning_rate": 8.362732957506714e-05,
"loss": 2.8056,
"step": 6463
},
{
"epoch": 28.826666666666668,
"grad_norm": 6.398525714874268,
"learning_rate": 8.350463951404012e-05,
"loss": 2.8254,
"step": 6486
},
{
"epoch": 28.92888888888889,
"grad_norm": 9.660991668701172,
"learning_rate": 8.338158221175581e-05,
"loss": 2.8516,
"step": 6509
},
{
"epoch": 29.031111111111112,
"grad_norm": 7.429766654968262,
"learning_rate": 8.325815901703394e-05,
"loss": 2.8115,
"step": 6532
},
{
"epoch": 29.133333333333333,
"grad_norm": 6.842705726623535,
"learning_rate": 8.313437128270469e-05,
"loss": 2.7238,
"step": 6555
},
{
"epoch": 29.235555555555557,
"grad_norm": 9.195459365844727,
"learning_rate": 8.301022036559405e-05,
"loss": 2.7192,
"step": 6578
},
{
"epoch": 29.337777777777777,
"grad_norm": 7.685567378997803,
"learning_rate": 8.288570762650869e-05,
"loss": 2.7009,
"step": 6601
},
{
"epoch": 29.44,
"grad_norm": 6.384602069854736,
"learning_rate": 8.276083443022126e-05,
"loss": 2.7286,
"step": 6624
},
{
"epoch": 29.54222222222222,
"grad_norm": 7.564410209655762,
"learning_rate": 8.263560214545532e-05,
"loss": 2.7405,
"step": 6647
},
{
"epoch": 29.644444444444446,
"grad_norm": 6.835319995880127,
"learning_rate": 8.251001214487039e-05,
"loss": 2.7197,
"step": 6670
},
{
"epoch": 29.746666666666666,
"grad_norm": 7.009396553039551,
"learning_rate": 8.238406580504683e-05,
"loss": 2.7322,
"step": 6693
},
{
"epoch": 29.84888888888889,
"grad_norm": 6.862404823303223,
"learning_rate": 8.225776450647082e-05,
"loss": 2.7476,
"step": 6716
},
{
"epoch": 29.95111111111111,
"grad_norm": 6.345396041870117,
"learning_rate": 8.213110963351928e-05,
"loss": 2.7317,
"step": 6739
},
{
"epoch": 30.053333333333335,
"grad_norm": 7.607011795043945,
"learning_rate": 8.200410257444451e-05,
"loss": 2.6859,
"step": 6762
},
{
"epoch": 30.155555555555555,
"grad_norm": 6.952041149139404,
"learning_rate": 8.187674472135915e-05,
"loss": 2.6587,
"step": 6785
},
{
"epoch": 30.25777777777778,
"grad_norm": 6.717074394226074,
"learning_rate": 8.17490374702209e-05,
"loss": 2.6636,
"step": 6808
},
{
"epoch": 30.36,
"grad_norm": 7.299156665802002,
"learning_rate": 8.162098222081711e-05,
"loss": 2.6731,
"step": 6831
},
{
"epoch": 30.462222222222223,
"grad_norm": 7.86132287979126,
"learning_rate": 8.149258037674952e-05,
"loss": 2.6568,
"step": 6854
},
{
"epoch": 30.564444444444444,
"grad_norm": 6.957241535186768,
"learning_rate": 8.13638333454189e-05,
"loss": 2.621,
"step": 6877
},
{
"epoch": 30.666666666666668,
"grad_norm": 7.0929741859436035,
"learning_rate": 8.123474253800957e-05,
"loss": 2.6453,
"step": 6900
},
{
"epoch": 30.76888888888889,
"grad_norm": 7.3665385246276855,
"learning_rate": 8.110530936947392e-05,
"loss": 2.6668,
"step": 6923
},
{
"epoch": 30.871111111111112,
"grad_norm": 8.744823455810547,
"learning_rate": 8.097553525851693e-05,
"loss": 2.6759,
"step": 6946
},
{
"epoch": 30.973333333333333,
"grad_norm": 6.603512287139893,
"learning_rate": 8.084542162758067e-05,
"loss": 2.6677,
"step": 6969
},
{
"epoch": 31.075555555555557,
"grad_norm": 6.355960369110107,
"learning_rate": 8.071496990282861e-05,
"loss": 2.6044,
"step": 6992
},
{
"epoch": 31.177777777777777,
"grad_norm": 6.957365989685059,
"learning_rate": 8.058418151413005e-05,
"loss": 2.5647,
"step": 7015
},
{
"epoch": 31.28,
"grad_norm": 7.455416679382324,
"learning_rate": 8.045305789504444e-05,
"loss": 2.5981,
"step": 7038
},
{
"epoch": 31.38222222222222,
"grad_norm": 6.41038703918457,
"learning_rate": 8.032160048280566e-05,
"loss": 2.6026,
"step": 7061
},
{
"epoch": 31.484444444444446,
"grad_norm": 8.298896789550781,
"learning_rate": 8.018981071830622e-05,
"loss": 2.5975,
"step": 7084
},
{
"epoch": 31.586666666666666,
"grad_norm": 9.506787300109863,
"learning_rate": 8.005769004608156e-05,
"loss": 2.6356,
"step": 7107
},
{
"epoch": 31.68888888888889,
"grad_norm": 8.870840072631836,
"learning_rate": 7.992523991429419e-05,
"loss": 2.6015,
"step": 7130
},
{
"epoch": 31.79111111111111,
"grad_norm": 8.160204887390137,
"learning_rate": 7.979246177471773e-05,
"loss": 2.593,
"step": 7153
},
{
"epoch": 31.893333333333334,
"grad_norm": 6.366309642791748,
"learning_rate": 7.96593570827211e-05,
"loss": 2.5548,
"step": 7176
},
{
"epoch": 31.995555555555555,
"grad_norm": 6.812814712524414,
"learning_rate": 7.952592729725254e-05,
"loss": 2.5352,
"step": 7199
},
{
"epoch": 32.09777777777778,
"grad_norm": 6.476632118225098,
"learning_rate": 7.939217388082361e-05,
"loss": 2.4694,
"step": 7222
},
{
"epoch": 32.2,
"grad_norm": 7.325323104858398,
"learning_rate": 7.925809829949312e-05,
"loss": 2.4581,
"step": 7245
},
{
"epoch": 32.30222222222222,
"grad_norm": 7.190999984741211,
"learning_rate": 7.912370202285113e-05,
"loss": 2.4829,
"step": 7268
},
{
"epoch": 32.404444444444444,
"grad_norm": 7.949245452880859,
"learning_rate": 7.898898652400281e-05,
"loss": 2.5134,
"step": 7291
},
{
"epoch": 32.50666666666667,
"grad_norm": 7.711633682250977,
"learning_rate": 7.88539532795523e-05,
"loss": 2.5374,
"step": 7314
},
{
"epoch": 32.60888888888889,
"grad_norm": 7.286764621734619,
"learning_rate": 7.87186037695865e-05,
"loss": 2.4946,
"step": 7337
},
{
"epoch": 32.71111111111111,
"grad_norm": 7.322375774383545,
"learning_rate": 7.858293947765892e-05,
"loss": 2.5086,
"step": 7360
},
{
"epoch": 32.81333333333333,
"grad_norm": 7.134939670562744,
"learning_rate": 7.844696189077328e-05,
"loss": 2.4963,
"step": 7383
},
{
"epoch": 32.91555555555556,
"grad_norm": 7.648177623748779,
"learning_rate": 7.831067249936734e-05,
"loss": 2.4857,
"step": 7406
},
{
"epoch": 33.01777777777778,
"grad_norm": 6.730453968048096,
"learning_rate": 7.817407279729657e-05,
"loss": 2.4906,
"step": 7429
},
{
"epoch": 33.12,
"grad_norm": 6.662753105163574,
"learning_rate": 7.803716428181763e-05,
"loss": 2.4054,
"step": 7452
},
{
"epoch": 33.22222222222222,
"grad_norm": 6.583335876464844,
"learning_rate": 7.789994845357212e-05,
"loss": 2.3762,
"step": 7475
},
{
"epoch": 33.324444444444445,
"grad_norm": 6.661638259887695,
"learning_rate": 7.776242681657006e-05,
"loss": 2.4166,
"step": 7498
},
{
"epoch": 33.42666666666667,
"grad_norm": 6.506235599517822,
"learning_rate": 7.762460087817343e-05,
"loss": 2.4081,
"step": 7521
},
{
"epoch": 33.528888888888886,
"grad_norm": 8.114941596984863,
"learning_rate": 7.748647214907954e-05,
"loss": 2.4189,
"step": 7544
},
{
"epoch": 33.63111111111111,
"grad_norm": 7.059467315673828,
"learning_rate": 7.73480421433047e-05,
"loss": 2.4416,
"step": 7567
},
{
"epoch": 33.733333333333334,
"grad_norm": 9.18146800994873,
"learning_rate": 7.720931237816735e-05,
"loss": 2.4374,
"step": 7590
},
{
"epoch": 33.83555555555556,
"grad_norm": 7.458983898162842,
"learning_rate": 7.707028437427164e-05,
"loss": 2.4392,
"step": 7613
},
{
"epoch": 33.937777777777775,
"grad_norm": 6.761877536773682,
"learning_rate": 7.693095965549069e-05,
"loss": 2.4354,
"step": 7636
},
{
"epoch": 34.04,
"grad_norm": 7.720556735992432,
"learning_rate": 7.679133974894983e-05,
"loss": 2.3844,
"step": 7659
},
{
"epoch": 34.14222222222222,
"grad_norm": 6.558327674865723,
"learning_rate": 7.665142618501e-05,
"loss": 2.3599,
"step": 7682
},
{
"epoch": 34.24444444444445,
"grad_norm": 6.790546894073486,
"learning_rate": 7.651122049725082e-05,
"loss": 2.3541,
"step": 7705
},
{
"epoch": 34.346666666666664,
"grad_norm": 6.559151649475098,
"learning_rate": 7.637072422245386e-05,
"loss": 2.3684,
"step": 7728
},
{
"epoch": 34.44888888888889,
"grad_norm": 8.255489349365234,
"learning_rate": 7.622993890058582e-05,
"loss": 2.3799,
"step": 7751
},
{
"epoch": 34.55111111111111,
"grad_norm": 8.185545921325684,
"learning_rate": 7.60888660747816e-05,
"loss": 2.3723,
"step": 7774
},
{
"epoch": 34.653333333333336,
"grad_norm": 7.4899516105651855,
"learning_rate": 7.594750729132743e-05,
"loss": 2.3813,
"step": 7797
},
{
"epoch": 34.75555555555555,
"grad_norm": 6.652093887329102,
"learning_rate": 7.580586409964382e-05,
"loss": 2.3641,
"step": 7820
},
{
"epoch": 34.85777777777778,
"grad_norm": 6.916318893432617,
"learning_rate": 7.566393805226874e-05,
"loss": 2.3689,
"step": 7843
},
{
"epoch": 34.96,
"grad_norm": 7.0521559715271,
"learning_rate": 7.552173070484048e-05,
"loss": 2.3528,
"step": 7866
},
{
"epoch": 35.062222222222225,
"grad_norm": 7.043063163757324,
"learning_rate": 7.537924361608062e-05,
"loss": 2.2977,
"step": 7889
},
{
"epoch": 35.16444444444444,
"grad_norm": 6.285613059997559,
"learning_rate": 7.523647834777698e-05,
"loss": 2.2593,
"step": 7912
},
{
"epoch": 35.266666666666666,
"grad_norm": 7.13001012802124,
"learning_rate": 7.509343646476646e-05,
"loss": 2.268,
"step": 7935
},
{
"epoch": 35.36888888888889,
"grad_norm": 6.38799524307251,
"learning_rate": 7.495011953491793e-05,
"loss": 2.291,
"step": 7958
},
{
"epoch": 35.471111111111114,
"grad_norm": 7.488864421844482,
"learning_rate": 7.480652912911501e-05,
"loss": 2.3234,
"step": 7981
},
{
"epoch": 35.57333333333333,
"grad_norm": 6.8178558349609375,
"learning_rate": 7.466266682123888e-05,
"loss": 2.3204,
"step": 8004
},
{
"epoch": 35.675555555555555,
"grad_norm": 7.1541748046875,
"learning_rate": 7.451853418815097e-05,
"loss": 2.3137,
"step": 8027
},
{
"epoch": 35.77777777777778,
"grad_norm": 8.040066719055176,
"learning_rate": 7.437413280967578e-05,
"loss": 2.3173,
"step": 8050
},
{
"epoch": 35.88,
"grad_norm": 8.158806800842285,
"learning_rate": 7.422946426858345e-05,
"loss": 2.2952,
"step": 8073
},
{
"epoch": 35.98222222222222,
"grad_norm": 7.60796594619751,
"learning_rate": 7.408453015057252e-05,
"loss": 2.2707,
"step": 8096
},
{
"epoch": 36.08444444444444,
"grad_norm": 6.903555870056152,
"learning_rate": 7.393933204425244e-05,
"loss": 2.2153,
"step": 8119
},
{
"epoch": 36.18666666666667,
"grad_norm": 7.1362624168396,
"learning_rate": 7.379387154112625e-05,
"loss": 2.2045,
"step": 8142
},
{
"epoch": 36.28888888888889,
"grad_norm": 7.824875354766846,
"learning_rate": 7.364815023557306e-05,
"loss": 2.215,
"step": 8165
},
{
"epoch": 36.39111111111111,
"grad_norm": 10.668073654174805,
"learning_rate": 7.350216972483064e-05,
"loss": 2.2303,
"step": 8188
},
{
"epoch": 36.49333333333333,
"grad_norm": 5.577554225921631,
"learning_rate": 7.33559316089779e-05,
"loss": 2.2175,
"step": 8211
},
{
"epoch": 36.595555555555556,
"grad_norm": 6.902368545532227,
"learning_rate": 7.320943749091728e-05,
"loss": 2.2207,
"step": 8234
},
{
"epoch": 36.69777777777778,
"grad_norm": 6.997749328613281,
"learning_rate": 7.30626889763573e-05,
"loss": 2.2525,
"step": 8257
},
{
"epoch": 36.8,
"grad_norm": 7.666829586029053,
"learning_rate": 7.291568767379484e-05,
"loss": 2.2427,
"step": 8280
},
{
"epoch": 36.90222222222222,
"grad_norm": 6.811129093170166,
"learning_rate": 7.27684351944976e-05,
"loss": 2.25,
"step": 8303
},
{
"epoch": 37.004444444444445,
"grad_norm": 5.935613632202148,
"learning_rate": 7.262093315248641e-05,
"loss": 2.2459,
"step": 8326
},
{
"epoch": 37.10666666666667,
"grad_norm": 6.339777946472168,
"learning_rate": 7.24731831645175e-05,
"loss": 2.167,
"step": 8349
},
{
"epoch": 37.208888888888886,
"grad_norm": 7.560238361358643,
"learning_rate": 7.232518685006485e-05,
"loss": 2.1952,
"step": 8372
},
{
"epoch": 37.31111111111111,
"grad_norm": 6.586178779602051,
"learning_rate": 7.21769458313024e-05,
"loss": 2.1791,
"step": 8395
},
{
"epoch": 37.413333333333334,
"grad_norm": 7.019660949707031,
"learning_rate": 7.20284617330862e-05,
"loss": 2.1754,
"step": 8418
},
{
"epoch": 37.51555555555556,
"grad_norm": 7.03871488571167,
"learning_rate": 7.187973618293678e-05,
"loss": 2.1585,
"step": 8441
},
{
"epoch": 37.617777777777775,
"grad_norm": 6.066256046295166,
"learning_rate": 7.173077081102114e-05,
"loss": 2.1424,
"step": 8464
},
{
"epoch": 37.72,
"grad_norm": 6.991265773773193,
"learning_rate": 7.158156725013493e-05,
"loss": 2.1577,
"step": 8487
},
{
"epoch": 37.82222222222222,
"grad_norm": 8.248811721801758,
"learning_rate": 7.14321271356846e-05,
"loss": 2.1603,
"step": 8510
},
{
"epoch": 37.92444444444445,
"grad_norm": 8.15676212310791,
"learning_rate": 7.128245210566947e-05,
"loss": 2.1695,
"step": 8533
},
{
"epoch": 38.026666666666664,
"grad_norm": 7.107559680938721,
"learning_rate": 7.113254380066367e-05,
"loss": 2.1488,
"step": 8556
},
{
"epoch": 38.12888888888889,
"grad_norm": 8.755867004394531,
"learning_rate": 7.098240386379831e-05,
"loss": 2.1009,
"step": 8579
},
{
"epoch": 38.23111111111111,
"grad_norm": 7.037129878997803,
"learning_rate": 7.083203394074334e-05,
"loss": 2.0954,
"step": 8602
},
{
"epoch": 38.333333333333336,
"grad_norm": 6.437880039215088,
"learning_rate": 7.068143567968957e-05,
"loss": 2.085,
"step": 8625
},
{
"epoch": 38.43555555555555,
"grad_norm": 10.530925750732422,
"learning_rate": 7.053061073133067e-05,
"loss": 2.1242,
"step": 8648
},
{
"epoch": 38.53777777777778,
"grad_norm": 7.10654878616333,
"learning_rate": 7.037956074884493e-05,
"loss": 2.1354,
"step": 8671
},
{
"epoch": 38.64,
"grad_norm": 6.740297794342041,
"learning_rate": 7.022828738787724e-05,
"loss": 2.1365,
"step": 8694
},
{
"epoch": 38.742222222222225,
"grad_norm": 7.16520357131958,
"learning_rate": 7.007679230652095e-05,
"loss": 2.1163,
"step": 8717
},
{
"epoch": 38.84444444444444,
"grad_norm": 7.305176258087158,
"learning_rate": 6.992507716529965e-05,
"loss": 2.1429,
"step": 8740
},
{
"epoch": 38.946666666666665,
"grad_norm": 5.924234390258789,
"learning_rate": 6.977314362714898e-05,
"loss": 2.1132,
"step": 8763
},
{
"epoch": 39.04888888888889,
"grad_norm": 8.262660026550293,
"learning_rate": 6.962099335739837e-05,
"loss": 2.0614,
"step": 8786
},
{
"epoch": 39.15111111111111,
"grad_norm": 7.352762699127197,
"learning_rate": 6.946862802375292e-05,
"loss": 2.0194,
"step": 8809
},
{
"epoch": 39.25333333333333,
"grad_norm": 6.5161824226379395,
"learning_rate": 6.931604929627495e-05,
"loss": 2.0356,
"step": 8832
},
{
"epoch": 39.355555555555554,
"grad_norm": 6.718994140625,
"learning_rate": 6.916325884736576e-05,
"loss": 2.0442,
"step": 8855
},
{
"epoch": 39.45777777777778,
"grad_norm": 6.267631530761719,
"learning_rate": 6.901025835174739e-05,
"loss": 2.0456,
"step": 8878
},
{
"epoch": 39.56,
"grad_norm": 6.105040550231934,
"learning_rate": 6.885704948644411e-05,
"loss": 2.0319,
"step": 8901
},
{
"epoch": 39.66222222222222,
"grad_norm": 6.807146072387695,
"learning_rate": 6.870363393076413e-05,
"loss": 2.051,
"step": 8924
},
{
"epoch": 39.76444444444444,
"grad_norm": 6.0141987800598145,
"learning_rate": 6.855001336628118e-05,
"loss": 2.0376,
"step": 8947
},
{
"epoch": 39.86666666666667,
"grad_norm": 7.84182596206665,
"learning_rate": 6.839618947681609e-05,
"loss": 2.0596,
"step": 8970
},
{
"epoch": 39.96888888888889,
"grad_norm": 8.566624641418457,
"learning_rate": 6.824216394841825e-05,
"loss": 2.0607,
"step": 8993
},
{
"epoch": 40.07111111111111,
"grad_norm": 6.4133992195129395,
"learning_rate": 6.808793846934729e-05,
"loss": 1.9994,
"step": 9016
},
{
"epoch": 40.17333333333333,
"grad_norm": 10.160492897033691,
"learning_rate": 6.79335147300544e-05,
"loss": 1.9999,
"step": 9039
},
{
"epoch": 40.275555555555556,
"grad_norm": 6.391870021820068,
"learning_rate": 6.777889442316394e-05,
"loss": 1.9972,
"step": 9062
},
{
"epoch": 40.37777777777778,
"grad_norm": 9.107426643371582,
"learning_rate": 6.762407924345479e-05,
"loss": 1.9891,
"step": 9085
},
{
"epoch": 40.48,
"grad_norm": 6.959272861480713,
"learning_rate": 6.746907088784182e-05,
"loss": 1.9765,
"step": 9108
},
{
"epoch": 40.58222222222222,
"grad_norm": 6.614034175872803,
"learning_rate": 6.73138710553573e-05,
"loss": 1.993,
"step": 9131
},
{
"epoch": 40.684444444444445,
"grad_norm": 7.331613063812256,
"learning_rate": 6.715848144713227e-05,
"loss": 1.9826,
"step": 9154
},
{
"epoch": 40.78666666666667,
"grad_norm": 8.619832992553711,
"learning_rate": 6.700290376637782e-05,
"loss": 2.0247,
"step": 9177
},
{
"epoch": 40.888888888888886,
"grad_norm": 7.282753944396973,
"learning_rate": 6.684713971836656e-05,
"loss": 2.0123,
"step": 9200
},
{
"epoch": 40.99111111111111,
"grad_norm": 7.198232173919678,
"learning_rate": 6.669119101041383e-05,
"loss": 2.0095,
"step": 9223
},
{
"epoch": 41.093333333333334,
"grad_norm": 6.148073673248291,
"learning_rate": 6.6535059351859e-05,
"loss": 1.9284,
"step": 9246
},
{
"epoch": 41.19555555555556,
"grad_norm": 7.000942230224609,
"learning_rate": 6.637874645404673e-05,
"loss": 1.9308,
"step": 9269
},
{
"epoch": 41.297777777777775,
"grad_norm": 9.497756004333496,
"learning_rate": 6.622225403030828e-05,
"loss": 1.9316,
"step": 9292
},
{
"epoch": 41.4,
"grad_norm": 6.189666748046875,
"learning_rate": 6.606558379594262e-05,
"loss": 1.9304,
"step": 9315
},
{
"epoch": 41.50222222222222,
"grad_norm": 6.823606014251709,
"learning_rate": 6.590873746819772e-05,
"loss": 1.9582,
"step": 9338
},
{
"epoch": 41.60444444444445,
"grad_norm": 6.261486530303955,
"learning_rate": 6.575171676625169e-05,
"loss": 1.9322,
"step": 9361
},
{
"epoch": 41.70666666666666,
"grad_norm": 6.920318603515625,
"learning_rate": 6.559452341119389e-05,
"loss": 1.9533,
"step": 9384
},
{
"epoch": 41.80888888888889,
"grad_norm": 7.246551513671875,
"learning_rate": 6.543715912600621e-05,
"loss": 1.9548,
"step": 9407
},
{
"epoch": 41.91111111111111,
"grad_norm": 6.377082824707031,
"learning_rate": 6.527962563554402e-05,
"loss": 1.9709,
"step": 9430
},
{
"epoch": 42.013333333333335,
"grad_norm": 7.362649440765381,
"learning_rate": 6.512192466651735e-05,
"loss": 1.9402,
"step": 9453
},
{
"epoch": 42.11555555555555,
"grad_norm": 9.08193588256836,
"learning_rate": 6.496405794747193e-05,
"loss": 1.8674,
"step": 9476
},
{
"epoch": 42.217777777777776,
"grad_norm": 6.658238410949707,
"learning_rate": 6.480602720877029e-05,
"loss": 1.8556,
"step": 9499
},
{
"epoch": 42.32,
"grad_norm": 6.951099395751953,
"learning_rate": 6.464783418257277e-05,
"loss": 1.8759,
"step": 9522
},
{
"epoch": 42.422222222222224,
"grad_norm": 8.758234977722168,
"learning_rate": 6.448948060281847e-05,
"loss": 1.8712,
"step": 9545
},
{
"epoch": 42.52444444444444,
"grad_norm": 6.225131988525391,
"learning_rate": 6.433096820520639e-05,
"loss": 1.8857,
"step": 9568
},
{
"epoch": 42.626666666666665,
"grad_norm": 7.351943492889404,
"learning_rate": 6.417229872717624e-05,
"loss": 1.8809,
"step": 9591
},
{
"epoch": 42.72888888888889,
"grad_norm": 7.482339859008789,
"learning_rate": 6.401347390788952e-05,
"loss": 1.8694,
"step": 9614
},
{
"epoch": 42.83111111111111,
"grad_norm": 6.971664905548096,
"learning_rate": 6.385449548821037e-05,
"loss": 1.8744,
"step": 9637
},
{
"epoch": 42.93333333333333,
"grad_norm": 6.296336650848389,
"learning_rate": 6.36953652106866e-05,
"loss": 1.8966,
"step": 9660
},
{
"epoch": 43.035555555555554,
"grad_norm": 6.986079216003418,
"learning_rate": 6.353608481953042e-05,
"loss": 1.8555,
"step": 9683
},
{
"epoch": 43.13777777777778,
"grad_norm": 5.542973041534424,
"learning_rate": 6.337665606059953e-05,
"loss": 1.8185,
"step": 9706
},
{
"epoch": 43.24,
"grad_norm": 7.133216381072998,
"learning_rate": 6.321708068137779e-05,
"loss": 1.8241,
"step": 9729
},
{
"epoch": 43.34222222222222,
"grad_norm": 6.318929672241211,
"learning_rate": 6.305736043095619e-05,
"loss": 1.8372,
"step": 9752
},
{
"epoch": 43.44444444444444,
"grad_norm": 6.268241882324219,
"learning_rate": 6.289749706001365e-05,
"loss": 1.8602,
"step": 9775
},
{
"epoch": 43.54666666666667,
"grad_norm": 5.881213665008545,
"learning_rate": 6.273749232079778e-05,
"loss": 1.8439,
"step": 9798
},
{
"epoch": 43.64888888888889,
"grad_norm": 6.6124186515808105,
"learning_rate": 6.257734796710575e-05,
"loss": 1.8428,
"step": 9821
},
{
"epoch": 43.75111111111111,
"grad_norm": 7.996447563171387,
"learning_rate": 6.241706575426504e-05,
"loss": 1.8354,
"step": 9844
},
{
"epoch": 43.85333333333333,
"grad_norm": 7.1598639488220215,
"learning_rate": 6.225664743911414e-05,
"loss": 1.8185,
"step": 9867
},
{
"epoch": 43.955555555555556,
"grad_norm": 7.8854265213012695,
"learning_rate": 6.209609477998338e-05,
"loss": 1.832,
"step": 9890
},
{
"epoch": 44.05777777777778,
"grad_norm": 8.291993141174316,
"learning_rate": 6.193540953667564e-05,
"loss": 1.7871,
"step": 9913
},
{
"epoch": 44.16,
"grad_norm": 8.600836753845215,
"learning_rate": 6.177459347044703e-05,
"loss": 1.7882,
"step": 9936
},
{
"epoch": 44.26222222222222,
"grad_norm": 8.065147399902344,
"learning_rate": 6.161364834398755e-05,
"loss": 1.7799,
"step": 9959
},
{
"epoch": 44.364444444444445,
"grad_norm": 8.459796905517578,
"learning_rate": 6.145257592140188e-05,
"loss": 1.763,
"step": 9982
},
{
"epoch": 44.46666666666667,
"grad_norm": 6.006131649017334,
"learning_rate": 6.129137796818997e-05,
"loss": 1.7885,
"step": 10005
},
{
"epoch": 44.568888888888885,
"grad_norm": 8.034002304077148,
"learning_rate": 6.113005625122767e-05,
"loss": 1.8008,
"step": 10028
},
{
"epoch": 44.67111111111111,
"grad_norm": 6.57339334487915,
"learning_rate": 6.09686125387474e-05,
"loss": 1.786,
"step": 10051
},
{
"epoch": 44.77333333333333,
"grad_norm": 7.233739376068115,
"learning_rate": 6.080704860031879e-05,
"loss": 1.7973,
"step": 10074
},
{
"epoch": 44.87555555555556,
"grad_norm": 7.365921497344971,
"learning_rate": 6.0645366206829244e-05,
"loss": 1.8094,
"step": 10097
},
{
"epoch": 44.977777777777774,
"grad_norm": 7.772608280181885,
"learning_rate": 6.048356713046452e-05,
"loss": 1.7963,
"step": 10120
},
{
"epoch": 45.08,
"grad_norm": 6.320626258850098,
"learning_rate": 6.032165314468935e-05,
"loss": 1.7384,
"step": 10143
},
{
"epoch": 45.18222222222222,
"grad_norm": 6.214219093322754,
"learning_rate": 6.015962602422796e-05,
"loss": 1.7253,
"step": 10166
},
{
"epoch": 45.284444444444446,
"grad_norm": 6.484301567077637,
"learning_rate": 5.999748754504465e-05,
"loss": 1.7361,
"step": 10189
},
{
"epoch": 45.38666666666666,
"grad_norm": 8.989522933959961,
"learning_rate": 5.9835239484324304e-05,
"loss": 1.7443,
"step": 10212
},
{
"epoch": 45.48888888888889,
"grad_norm": 10.29185676574707,
"learning_rate": 5.967288362045291e-05,
"loss": 1.7423,
"step": 10235
},
{
"epoch": 45.59111111111111,
"grad_norm": 7.059528350830078,
"learning_rate": 5.951042173299811e-05,
"loss": 1.7292,
"step": 10258
},
{
"epoch": 45.693333333333335,
"grad_norm": 6.192359447479248,
"learning_rate": 5.9347855602689616e-05,
"loss": 1.7204,
"step": 10281
},
{
"epoch": 45.79555555555555,
"grad_norm": 6.398216247558594,
"learning_rate": 5.918518701139978e-05,
"loss": 1.7395,
"step": 10304
},
{
"epoch": 45.897777777777776,
"grad_norm": 6.21365213394165,
"learning_rate": 5.902241774212398e-05,
"loss": 1.7343,
"step": 10327
},
{
"epoch": 46.0,
"grad_norm": 6.119551658630371,
"learning_rate": 5.885954957896115e-05,
"loss": 1.7463,
"step": 10350
},
{
"epoch": 46.102222222222224,
"grad_norm": 5.506466865539551,
"learning_rate": 5.8696584307094146e-05,
"loss": 1.657,
"step": 10373
},
{
"epoch": 46.20444444444445,
"grad_norm": 6.575307369232178,
"learning_rate": 5.853352371277029e-05,
"loss": 1.6622,
"step": 10396
},
{
"epoch": 46.306666666666665,
"grad_norm": 6.451313018798828,
"learning_rate": 5.8370369583281634e-05,
"loss": 1.6861,
"step": 10419
},
{
"epoch": 46.40888888888889,
"grad_norm": 7.1156816482543945,
"learning_rate": 5.820712370694558e-05,
"loss": 1.6859,
"step": 10442
},
{
"epoch": 46.51111111111111,
"grad_norm": 6.124991416931152,
"learning_rate": 5.8043787873085044e-05,
"loss": 1.6763,
"step": 10465
},
{
"epoch": 46.61333333333333,
"grad_norm": 8.477898597717285,
"learning_rate": 5.7880363872009016e-05,
"loss": 1.6952,
"step": 10488
},
{
"epoch": 46.715555555555554,
"grad_norm": 7.237541198730469,
"learning_rate": 5.771685349499288e-05,
"loss": 1.676,
"step": 10511
},
{
"epoch": 46.81777777777778,
"grad_norm": 5.890578269958496,
"learning_rate": 5.7553258534258756e-05,
"loss": 1.6964,
"step": 10534
},
{
"epoch": 46.92,
"grad_norm": 6.47843074798584,
"learning_rate": 5.7389580782955896e-05,
"loss": 1.7098,
"step": 10557
},
{
"epoch": 47.022222222222226,
"grad_norm": 9.489853858947754,
"learning_rate": 5.722582203514099e-05,
"loss": 1.6894,
"step": 10580
},
{
"epoch": 47.12444444444444,
"grad_norm": 5.722830295562744,
"learning_rate": 5.7061984085758555e-05,
"loss": 1.6463,
"step": 10603
},
{
"epoch": 47.22666666666667,
"grad_norm": 5.548519134521484,
"learning_rate": 5.689806873062122e-05,
"loss": 1.6358,
"step": 10626
},
{
"epoch": 47.32888888888889,
"grad_norm": 5.543103218078613,
"learning_rate": 5.6734077766390023e-05,
"loss": 1.6249,
"step": 10649
},
{
"epoch": 47.431111111111115,
"grad_norm": 7.334754467010498,
"learning_rate": 5.6570012990554774e-05,
"loss": 1.6144,
"step": 10672
},
{
"epoch": 47.53333333333333,
"grad_norm": 6.74175500869751,
"learning_rate": 5.6405876201414334e-05,
"loss": 1.6413,
"step": 10695
},
{
"epoch": 47.635555555555555,
"grad_norm": 8.000964164733887,
"learning_rate": 5.624166919805686e-05,
"loss": 1.6583,
"step": 10718
},
{
"epoch": 47.73777777777778,
"grad_norm": 6.7785797119140625,
"learning_rate": 5.607739378034015e-05,
"loss": 1.6346,
"step": 10741
},
{
"epoch": 47.84,
"grad_norm": 8.0484619140625,
"learning_rate": 5.591305174887185e-05,
"loss": 1.6615,
"step": 10764
},
{
"epoch": 47.94222222222222,
"grad_norm": 6.589325428009033,
"learning_rate": 5.574864490498982e-05,
"loss": 1.6556,
"step": 10787
},
{
"epoch": 48.044444444444444,
"grad_norm": 5.7148942947387695,
"learning_rate": 5.558417505074226e-05,
"loss": 1.6129,
"step": 10810
},
{
"epoch": 48.14666666666667,
"grad_norm": 6.063688278198242,
"learning_rate": 5.541964398886805e-05,
"loss": 1.5707,
"step": 10833
},
{
"epoch": 48.24888888888889,
"grad_norm": 7.891332626342773,
"learning_rate": 5.525505352277695e-05,
"loss": 1.5966,
"step": 10856
},
{
"epoch": 48.35111111111111,
"grad_norm": 6.462911605834961,
"learning_rate": 5.509040545652984e-05,
"loss": 1.5979,
"step": 10879
},
{
"epoch": 48.45333333333333,
"grad_norm": 6.627693176269531,
"learning_rate": 5.492570159481897e-05,
"loss": 1.5835,
"step": 10902
},
{
"epoch": 48.55555555555556,
"grad_norm": 7.016481399536133,
"learning_rate": 5.4760943742948126e-05,
"loss": 1.6114,
"step": 10925
},
{
"epoch": 48.65777777777778,
"grad_norm": 6.203521251678467,
"learning_rate": 5.4596133706812925e-05,
"loss": 1.6261,
"step": 10948
},
{
"epoch": 48.76,
"grad_norm": 8.625542640686035,
"learning_rate": 5.443127329288092e-05,
"loss": 1.6152,
"step": 10971
},
{
"epoch": 48.86222222222222,
"grad_norm": 8.934986114501953,
"learning_rate": 5.426636430817189e-05,
"loss": 1.6155,
"step": 10994
},
{
"epoch": 48.964444444444446,
"grad_norm": 6.330492973327637,
"learning_rate": 5.4101408560237964e-05,
"loss": 1.624,
"step": 11017
},
{
"epoch": 49.06666666666667,
"grad_norm": 7.745333671569824,
"learning_rate": 5.393640785714386e-05,
"loss": 1.5832,
"step": 11040
},
{
"epoch": 49.16888888888889,
"grad_norm": 7.9969682693481445,
"learning_rate": 5.377136400744701e-05,
"loss": 1.5664,
"step": 11063
},
{
"epoch": 49.27111111111111,
"grad_norm": 6.262273788452148,
"learning_rate": 5.3606278820177824e-05,
"loss": 1.5464,
"step": 11086
},
{
"epoch": 49.373333333333335,
"grad_norm": 6.109494686126709,
"learning_rate": 5.344115410481977e-05,
"loss": 1.5242,
"step": 11109
},
{
"epoch": 49.47555555555556,
"grad_norm": 6.395167827606201,
"learning_rate": 5.3275991671289594e-05,
"loss": 1.5514,
"step": 11132
},
{
"epoch": 49.577777777777776,
"grad_norm": 8.812541961669922,
"learning_rate": 5.311079332991748e-05,
"loss": 1.527,
"step": 11155
},
{
"epoch": 49.68,
"grad_norm": 8.040874481201172,
"learning_rate": 5.294556089142716e-05,
"loss": 1.5469,
"step": 11178
},
{
"epoch": 49.782222222222224,
"grad_norm": 6.935076713562012,
"learning_rate": 5.278029616691613e-05,
"loss": 1.566,
"step": 11201
},
{
"epoch": 49.88444444444445,
"grad_norm": 7.0155181884765625,
"learning_rate": 5.261500096783577e-05,
"loss": 1.5642,
"step": 11224
},
{
"epoch": 49.986666666666665,
"grad_norm": 8.399476051330566,
"learning_rate": 5.2449677105971476e-05,
"loss": 1.5664,
"step": 11247
},
{
"epoch": 50.08888888888889,
"grad_norm": 6.229375839233398,
"learning_rate": 5.22843263934228e-05,
"loss": 1.5044,
"step": 11270
},
{
"epoch": 50.19111111111111,
"grad_norm": 8.590860366821289,
"learning_rate": 5.211895064258365e-05,
"loss": 1.5104,
"step": 11293
},
{
"epoch": 50.29333333333334,
"grad_norm": 6.563053607940674,
"learning_rate": 5.195355166612234e-05,
"loss": 1.5279,
"step": 11316
},
{
"epoch": 50.39555555555555,
"grad_norm": 6.139184474945068,
"learning_rate": 5.178813127696175e-05,
"loss": 1.5323,
"step": 11339
},
{
"epoch": 50.49777777777778,
"grad_norm": 6.862679958343506,
"learning_rate": 5.162269128825949e-05,
"loss": 1.526,
"step": 11362
},
{
"epoch": 50.6,
"grad_norm": 7.023072719573975,
"learning_rate": 5.1457233513387994e-05,
"loss": 1.5244,
"step": 11385
},
{
"epoch": 50.702222222222225,
"grad_norm": 6.219864368438721,
"learning_rate": 5.1291759765914625e-05,
"loss": 1.5333,
"step": 11408
},
{
"epoch": 50.80444444444444,
"grad_norm": 6.453531265258789,
"learning_rate": 5.112627185958184e-05,
"loss": 1.5319,
"step": 11431
},
{
"epoch": 50.906666666666666,
"grad_norm": 5.3879876136779785,
"learning_rate": 5.096077160828728e-05,
"loss": 1.5279,
"step": 11454
},
{
"epoch": 51.00888888888889,
"grad_norm": 6.174513339996338,
"learning_rate": 5.079526082606394e-05,
"loss": 1.5157,
"step": 11477
},
{
"epoch": 51.111111111111114,
"grad_norm": 8.612546920776367,
"learning_rate": 5.062974132706016e-05,
"loss": 1.4655,
"step": 11500
},
{
"epoch": 51.21333333333333,
"grad_norm": 6.833427429199219,
"learning_rate": 5.046421492551992e-05,
"loss": 1.4723,
"step": 11523
},
{
"epoch": 51.315555555555555,
"grad_norm": 6.863546371459961,
"learning_rate": 5.029868343576276e-05,
"loss": 1.4848,
"step": 11546
},
{
"epoch": 51.41777777777778,
"grad_norm": 7.937037467956543,
"learning_rate": 5.013314867216407e-05,
"loss": 1.4613,
"step": 11569
},
{
"epoch": 51.52,
"grad_norm": 6.1333699226379395,
"learning_rate": 4.996761244913508e-05,
"loss": 1.478,
"step": 11592
},
{
"epoch": 51.62222222222222,
"grad_norm": 9.617277145385742,
"learning_rate": 4.980207658110305e-05,
"loss": 1.4705,
"step": 11615
},
{
"epoch": 51.724444444444444,
"grad_norm": 6.086880207061768,
"learning_rate": 4.963654288249134e-05,
"loss": 1.4673,
"step": 11638
},
{
"epoch": 51.82666666666667,
"grad_norm": 5.924047470092773,
"learning_rate": 4.9471013167699476e-05,
"loss": 1.4855,
"step": 11661
},
{
"epoch": 51.92888888888889,
"grad_norm": 5.790915489196777,
"learning_rate": 4.930548925108342e-05,
"loss": 1.4879,
"step": 11684
},
{
"epoch": 52.03111111111111,
"grad_norm": 10.055533409118652,
"learning_rate": 4.913997294693547e-05,
"loss": 1.4776,
"step": 11707
},
{
"epoch": 52.13333333333333,
"grad_norm": 5.994448661804199,
"learning_rate": 4.8974466069464586e-05,
"loss": 1.4281,
"step": 11730
},
{
"epoch": 52.23555555555556,
"grad_norm": 6.34792947769165,
"learning_rate": 4.880897043277632e-05,
"loss": 1.4232,
"step": 11753
},
{
"epoch": 52.33777777777778,
"grad_norm": 6.8388285636901855,
"learning_rate": 4.8643487850853093e-05,
"loss": 1.4415,
"step": 11776
},
{
"epoch": 52.44,
"grad_norm": 6.194220542907715,
"learning_rate": 4.847802013753414e-05,
"loss": 1.4363,
"step": 11799
},
{
"epoch": 52.54222222222222,
"grad_norm": 7.254870891571045,
"learning_rate": 4.831256910649582e-05,
"loss": 1.445,
"step": 11822
},
{
"epoch": 52.644444444444446,
"grad_norm": 6.243785858154297,
"learning_rate": 4.814713657123158e-05,
"loss": 1.4399,
"step": 11845
},
{
"epoch": 52.74666666666667,
"grad_norm": 7.5753607749938965,
"learning_rate": 4.798172434503213e-05,
"loss": 1.4521,
"step": 11868
},
{
"epoch": 52.84888888888889,
"grad_norm": 6.7162861824035645,
"learning_rate": 4.781633424096562e-05,
"loss": 1.4446,
"step": 11891
},
{
"epoch": 52.95111111111111,
"grad_norm": 8.405692100524902,
"learning_rate": 4.765096807185767e-05,
"loss": 1.4712,
"step": 11914
},
{
"epoch": 53.053333333333335,
"grad_norm": 5.832555294036865,
"learning_rate": 4.748562765027162e-05,
"loss": 1.4306,
"step": 11937
},
{
"epoch": 53.15555555555556,
"grad_norm": 5.443018436431885,
"learning_rate": 4.7320314788488496e-05,
"loss": 1.3977,
"step": 11960
},
{
"epoch": 53.257777777777775,
"grad_norm": 6.506402969360352,
"learning_rate": 4.715503129848733e-05,
"loss": 1.419,
"step": 11983
},
{
"epoch": 53.36,
"grad_norm": 7.063472747802734,
"learning_rate": 4.69897789919252e-05,
"loss": 1.4188,
"step": 12006
},
{
"epoch": 53.46222222222222,
"grad_norm": 6.49618673324585,
"learning_rate": 4.682455968011731e-05,
"loss": 1.421,
"step": 12029
},
{
"epoch": 53.56444444444445,
"grad_norm": 7.384080410003662,
"learning_rate": 4.6659375174017316e-05,
"loss": 1.4157,
"step": 12052
},
{
"epoch": 53.666666666666664,
"grad_norm": 6.499640464782715,
"learning_rate": 4.6494227284197294e-05,
"loss": 1.3914,
"step": 12075
},
{
"epoch": 53.76888888888889,
"grad_norm": 8.480474472045898,
"learning_rate": 4.632911782082804e-05,
"loss": 1.387,
"step": 12098
},
{
"epoch": 53.87111111111111,
"grad_norm": 7.255825519561768,
"learning_rate": 4.616404859365907e-05,
"loss": 1.4147,
"step": 12121
},
{
"epoch": 53.973333333333336,
"grad_norm": 5.0700249671936035,
"learning_rate": 4.599902141199897e-05,
"loss": 1.389,
"step": 12144
},
{
"epoch": 54.07555555555555,
"grad_norm": 5.912162780761719,
"learning_rate": 4.583403808469542e-05,
"loss": 1.3623,
"step": 12167
},
{
"epoch": 54.17777777777778,
"grad_norm": 5.70848274230957,
"learning_rate": 4.566910042011539e-05,
"loss": 1.3513,
"step": 12190
},
{
"epoch": 54.28,
"grad_norm": 8.14360523223877,
"learning_rate": 4.550421022612542e-05,
"loss": 1.3729,
"step": 12213
},
{
"epoch": 54.382222222222225,
"grad_norm": 5.549880027770996,
"learning_rate": 4.5339369310071654e-05,
"loss": 1.3797,
"step": 12236
},
{
"epoch": 54.48444444444444,
"grad_norm": 6.507516384124756,
"learning_rate": 4.517457947876018e-05,
"loss": 1.3824,
"step": 12259
},
{
"epoch": 54.586666666666666,
"grad_norm": 6.413192272186279,
"learning_rate": 4.500984253843707e-05,
"loss": 1.3718,
"step": 12282
},
{
"epoch": 54.68888888888889,
"grad_norm": 6.168595790863037,
"learning_rate": 4.484516029476873e-05,
"loss": 1.3726,
"step": 12305
},
{
"epoch": 54.791111111111114,
"grad_norm": 6.176178932189941,
"learning_rate": 4.4680534552821996e-05,
"loss": 1.3776,
"step": 12328
},
{
"epoch": 54.89333333333333,
"grad_norm": 6.18988561630249,
"learning_rate": 4.45159671170444e-05,
"loss": 1.3764,
"step": 12351
},
{
"epoch": 54.995555555555555,
"grad_norm": 6.998044490814209,
"learning_rate": 4.4351459791244435e-05,
"loss": 1.375,
"step": 12374
},
{
"epoch": 55.09777777777778,
"grad_norm": 6.069551467895508,
"learning_rate": 4.418701437857166e-05,
"loss": 1.3324,
"step": 12397
},
{
"epoch": 55.2,
"grad_norm": 6.534727096557617,
"learning_rate": 4.402263268149706e-05,
"loss": 1.3301,
"step": 12420
},
{
"epoch": 55.30222222222222,
"grad_norm": 6.363480567932129,
"learning_rate": 4.385831650179322e-05,
"loss": 1.3524,
"step": 12443
},
{
"epoch": 55.404444444444444,
"grad_norm": 6.515593528747559,
"learning_rate": 4.3694067640514614e-05,
"loss": 1.3353,
"step": 12466
},
{
"epoch": 55.50666666666667,
"grad_norm": 6.400863170623779,
"learning_rate": 4.352988789797781e-05,
"loss": 1.3292,
"step": 12489
},
{
"epoch": 55.60888888888889,
"grad_norm": 6.897211074829102,
"learning_rate": 4.336577907374181e-05,
"loss": 1.3591,
"step": 12512
},
{
"epoch": 55.71111111111111,
"grad_norm": 7.05909538269043,
"learning_rate": 4.320174296658827e-05,
"loss": 1.3636,
"step": 12535
},
{
"epoch": 55.81333333333333,
"grad_norm": 5.776651859283447,
"learning_rate": 4.303778137450178e-05,
"loss": 1.3475,
"step": 12558
},
{
"epoch": 55.91555555555556,
"grad_norm": 6.0230021476745605,
"learning_rate": 4.287389609465022e-05,
"loss": 1.3681,
"step": 12581
},
{
"epoch": 56.01777777777778,
"grad_norm": 6.32971715927124,
"learning_rate": 4.271008892336497e-05,
"loss": 1.3458,
"step": 12604
},
{
"epoch": 56.12,
"grad_norm": 8.626049041748047,
"learning_rate": 4.2546361656121346e-05,
"loss": 1.2829,
"step": 12627
},
{
"epoch": 56.22222222222222,
"grad_norm": 6.015228748321533,
"learning_rate": 4.238271608751874e-05,
"loss": 1.2816,
"step": 12650
},
{
"epoch": 56.324444444444445,
"grad_norm": 5.704399108886719,
"learning_rate": 4.221915401126113e-05,
"loss": 1.3026,
"step": 12673
},
{
"epoch": 56.42666666666667,
"grad_norm": 5.911527156829834,
"learning_rate": 4.205567722013733e-05,
"loss": 1.2857,
"step": 12696
},
{
"epoch": 56.528888888888886,
"grad_norm": 6.171534538269043,
"learning_rate": 4.18922875060013e-05,
"loss": 1.2873,
"step": 12719
},
{
"epoch": 56.63111111111111,
"grad_norm": 7.097690105438232,
"learning_rate": 4.1728986659752636e-05,
"loss": 1.3012,
"step": 12742
},
{
"epoch": 56.733333333333334,
"grad_norm": 5.469725608825684,
"learning_rate": 4.156577647131679e-05,
"loss": 1.2895,
"step": 12765
},
{
"epoch": 56.83555555555556,
"grad_norm": 6.386800765991211,
"learning_rate": 4.1402658729625596e-05,
"loss": 1.3026,
"step": 12788
},
{
"epoch": 56.937777777777775,
"grad_norm": 5.86681604385376,
"learning_rate": 4.1239635222597494e-05,
"loss": 1.3072,
"step": 12811
},
{
"epoch": 57.04,
"grad_norm": 6.062530517578125,
"learning_rate": 4.107670773711812e-05,
"loss": 1.284,
"step": 12834
},
{
"epoch": 57.14222222222222,
"grad_norm": 5.922295570373535,
"learning_rate": 4.091387805902058e-05,
"loss": 1.2621,
"step": 12857
},
{
"epoch": 57.24444444444445,
"grad_norm": 5.438425064086914,
"learning_rate": 4.075114797306589e-05,
"loss": 1.264,
"step": 12880
},
{
"epoch": 57.346666666666664,
"grad_norm": 7.964729309082031,
"learning_rate": 4.058851926292353e-05,
"loss": 1.2781,
"step": 12903
},
{
"epoch": 57.44888888888889,
"grad_norm": 6.432003498077393,
"learning_rate": 4.042599371115172e-05,
"loss": 1.2787,
"step": 12926
},
{
"epoch": 57.55111111111111,
"grad_norm": 5.485337257385254,
"learning_rate": 4.026357309917806e-05,
"loss": 1.2663,
"step": 12949
},
{
"epoch": 57.653333333333336,
"grad_norm": 6.874802112579346,
"learning_rate": 4.010125920727982e-05,
"loss": 1.2733,
"step": 12972
},
{
"epoch": 57.75555555555555,
"grad_norm": 5.767955303192139,
"learning_rate": 3.993905381456462e-05,
"loss": 1.2763,
"step": 12995
},
{
"epoch": 57.85777777777778,
"grad_norm": 5.2443389892578125,
"learning_rate": 3.977695869895073e-05,
"loss": 1.273,
"step": 13018
},
{
"epoch": 57.96,
"grad_norm": 7.763814926147461,
"learning_rate": 3.961497563714774e-05,
"loss": 1.2851,
"step": 13041
},
{
"epoch": 58.062222222222225,
"grad_norm": 6.231062412261963,
"learning_rate": 3.945310640463705e-05,
"loss": 1.2581,
"step": 13064
},
{
"epoch": 58.16444444444444,
"grad_norm": 5.801052093505859,
"learning_rate": 3.9291352775652325e-05,
"loss": 1.2376,
"step": 13087
},
{
"epoch": 58.266666666666666,
"grad_norm": 8.022377967834473,
"learning_rate": 3.9129716523160165e-05,
"loss": 1.2403,
"step": 13110
},
{
"epoch": 58.36888888888889,
"grad_norm": 6.449449062347412,
"learning_rate": 3.8968199418840575e-05,
"loss": 1.2353,
"step": 13133
},
{
"epoch": 58.471111111111114,
"grad_norm": 5.934969902038574,
"learning_rate": 3.880680323306765e-05,
"loss": 1.2575,
"step": 13156
},
{
"epoch": 58.57333333333333,
"grad_norm": 6.265482425689697,
"learning_rate": 3.8645529734890014e-05,
"loss": 1.247,
"step": 13179
},
{
"epoch": 58.675555555555555,
"grad_norm": 5.975387096405029,
"learning_rate": 3.8484380692011605e-05,
"loss": 1.2634,
"step": 13202
},
{
"epoch": 58.77777777777778,
"grad_norm": 6.401468753814697,
"learning_rate": 3.83233578707722e-05,
"loss": 1.244,
"step": 13225
},
{
"epoch": 58.88,
"grad_norm": 5.331010341644287,
"learning_rate": 3.816246303612802e-05,
"loss": 1.2459,
"step": 13248
},
{
"epoch": 58.98222222222222,
"grad_norm": 5.550204277038574,
"learning_rate": 3.800169795163252e-05,
"loss": 1.2541,
"step": 13271
},
{
"epoch": 59.08444444444444,
"grad_norm": 5.241280555725098,
"learning_rate": 3.7841064379416903e-05,
"loss": 1.2155,
"step": 13294
},
{
"epoch": 59.18666666666667,
"grad_norm": 6.312388896942139,
"learning_rate": 3.768056408017094e-05,
"loss": 1.2055,
"step": 13317
},
{
"epoch": 59.28888888888889,
"grad_norm": 5.525976181030273,
"learning_rate": 3.752019881312354e-05,
"loss": 1.211,
"step": 13340
},
{
"epoch": 59.39111111111111,
"grad_norm": 6.092748641967773,
"learning_rate": 3.735997033602361e-05,
"loss": 1.2133,
"step": 13363
},
{
"epoch": 59.49333333333333,
"grad_norm": 5.471757888793945,
"learning_rate": 3.719988040512067e-05,
"loss": 1.2267,
"step": 13386
},
{
"epoch": 59.595555555555556,
"grad_norm": 6.422407150268555,
"learning_rate": 3.703993077514563e-05,
"loss": 1.2223,
"step": 13409
},
{
"epoch": 59.69777777777778,
"grad_norm": 5.488748073577881,
"learning_rate": 3.6880123199291635e-05,
"loss": 1.2217,
"step": 13432
},
{
"epoch": 59.8,
"grad_norm": 5.826624393463135,
"learning_rate": 3.672045942919474e-05,
"loss": 1.2216,
"step": 13455
},
{
"epoch": 59.90222222222222,
"grad_norm": 5.7313008308410645,
"learning_rate": 3.656094121491479e-05,
"loss": 1.2271,
"step": 13478
},
{
"epoch": 60.004444444444445,
"grad_norm": 7.073070526123047,
"learning_rate": 3.6401570304916166e-05,
"loss": 1.222,
"step": 13501
},
{
"epoch": 60.10666666666667,
"grad_norm": 5.049999713897705,
"learning_rate": 3.624234844604869e-05,
"loss": 1.1695,
"step": 13524
},
{
"epoch": 60.208888888888886,
"grad_norm": 5.1560211181640625,
"learning_rate": 3.6083277383528466e-05,
"loss": 1.1792,
"step": 13547
},
{
"epoch": 60.31111111111111,
"grad_norm": 5.553138256072998,
"learning_rate": 3.592435886091867e-05,
"loss": 1.1853,
"step": 13570
},
{
"epoch": 60.413333333333334,
"grad_norm": 5.489965438842773,
"learning_rate": 3.576559462011057e-05,
"loss": 1.1918,
"step": 13593
},
{
"epoch": 60.51555555555556,
"grad_norm": 6.636351108551025,
"learning_rate": 3.5606986401304324e-05,
"loss": 1.2002,
"step": 13616
},
{
"epoch": 60.617777777777775,
"grad_norm": 8.49821662902832,
"learning_rate": 3.544853594298997e-05,
"loss": 1.2062,
"step": 13639
},
{
"epoch": 60.72,
"grad_norm": 5.866752624511719,
"learning_rate": 3.529024498192832e-05,
"loss": 1.205,
"step": 13662
},
{
"epoch": 60.82222222222222,
"grad_norm": 12.07309627532959,
"learning_rate": 3.5132115253132005e-05,
"loss": 1.2112,
"step": 13685
},
{
"epoch": 60.92444444444445,
"grad_norm": 7.421104431152344,
"learning_rate": 3.4974148489846315e-05,
"loss": 1.2229,
"step": 13708
},
{
"epoch": 61.026666666666664,
"grad_norm": 5.546532154083252,
"learning_rate": 3.4816346423530385e-05,
"loss": 1.1952,
"step": 13731
},
{
"epoch": 61.12888888888889,
"grad_norm": 5.055679798126221,
"learning_rate": 3.465871078383809e-05,
"loss": 1.1628,
"step": 13754
},
{
"epoch": 61.23111111111111,
"grad_norm": 6.14479923248291,
"learning_rate": 3.4501243298599055e-05,
"loss": 1.1767,
"step": 13777
},
{
"epoch": 61.333333333333336,
"grad_norm": 5.632229328155518,
"learning_rate": 3.434394569379988e-05,
"loss": 1.179,
"step": 13800
},
{
"epoch": 61.43555555555555,
"grad_norm": 5.1467671394348145,
"learning_rate": 3.4186819693565046e-05,
"loss": 1.1745,
"step": 13823
},
{
"epoch": 61.53777777777778,
"grad_norm": 5.162554740905762,
"learning_rate": 3.4029867020138155e-05,
"loss": 1.1672,
"step": 13846
},
{
"epoch": 61.64,
"grad_norm": 5.325419902801514,
"learning_rate": 3.387308939386291e-05,
"loss": 1.1793,
"step": 13869
},
{
"epoch": 61.742222222222225,
"grad_norm": 5.7772626876831055,
"learning_rate": 3.371648853316442e-05,
"loss": 1.1706,
"step": 13892
},
{
"epoch": 61.84444444444444,
"grad_norm": 7.251054763793945,
"learning_rate": 3.356006615453025e-05,
"loss": 1.1572,
"step": 13915
},
{
"epoch": 61.946666666666665,
"grad_norm": 6.169683933258057,
"learning_rate": 3.340382397249159e-05,
"loss": 1.1553,
"step": 13938
},
{
"epoch": 62.04888888888889,
"grad_norm": 6.773545742034912,
"learning_rate": 3.324776369960461e-05,
"loss": 1.1603,
"step": 13961
},
{
"epoch": 62.15111111111111,
"grad_norm": 6.104127407073975,
"learning_rate": 3.309188704643149e-05,
"loss": 1.1209,
"step": 13984
},
{
"epoch": 62.25333333333333,
"grad_norm": 5.433740615844727,
"learning_rate": 3.2936195721521866e-05,
"loss": 1.1373,
"step": 14007
},
{
"epoch": 62.355555555555554,
"grad_norm": 5.472240924835205,
"learning_rate": 3.2780691431393926e-05,
"loss": 1.143,
"step": 14030
},
{
"epoch": 62.45777777777778,
"grad_norm": 5.382284164428711,
"learning_rate": 3.2625375880515854e-05,
"loss": 1.1471,
"step": 14053
},
{
"epoch": 62.56,
"grad_norm": 5.667013168334961,
"learning_rate": 3.2470250771287036e-05,
"loss": 1.1391,
"step": 14076
},
{
"epoch": 62.66222222222222,
"grad_norm": 5.519725322723389,
"learning_rate": 3.231531780401943e-05,
"loss": 1.1335,
"step": 14099
},
{
"epoch": 62.76444444444444,
"grad_norm": 5.530640125274658,
"learning_rate": 3.2160578676919016e-05,
"loss": 1.1386,
"step": 14122
},
{
"epoch": 62.86666666666667,
"grad_norm": 6.683435440063477,
"learning_rate": 3.200603508606703e-05,
"loss": 1.1362,
"step": 14145
},
{
"epoch": 62.96888888888889,
"grad_norm": 5.929420471191406,
"learning_rate": 3.185168872540153e-05,
"loss": 1.1455,
"step": 14168
},
{
"epoch": 63.07111111111111,
"grad_norm": 6.305390357971191,
"learning_rate": 3.169754128669866e-05,
"loss": 1.1242,
"step": 14191
},
{
"epoch": 63.17333333333333,
"grad_norm": 6.4048542976379395,
"learning_rate": 3.154359445955429e-05,
"loss": 1.1263,
"step": 14214
},
{
"epoch": 63.275555555555556,
"grad_norm": 5.409482002258301,
"learning_rate": 3.138984993136535e-05,
"loss": 1.1052,
"step": 14237
},
{
"epoch": 63.37777777777778,
"grad_norm": 5.47636079788208,
"learning_rate": 3.12363093873114e-05,
"loss": 1.1196,
"step": 14260
},
{
"epoch": 63.48,
"grad_norm": 5.092154026031494,
"learning_rate": 3.108297451033616e-05,
"loss": 1.1193,
"step": 14283
},
{
"epoch": 63.58222222222222,
"grad_norm": 5.453930377960205,
"learning_rate": 3.092984698112904e-05,
"loss": 1.1182,
"step": 14306
},
{
"epoch": 63.684444444444445,
"grad_norm": 6.511165618896484,
"learning_rate": 3.0776928478106754e-05,
"loss": 1.1295,
"step": 14329
},
{
"epoch": 63.78666666666667,
"grad_norm": 5.347112655639648,
"learning_rate": 3.062422067739485e-05,
"loss": 1.1239,
"step": 14352
},
{
"epoch": 63.888888888888886,
"grad_norm": 5.500729084014893,
"learning_rate": 3.0471725252809458e-05,
"loss": 1.1227,
"step": 14375
},
{
"epoch": 63.99111111111111,
"grad_norm": 5.913949489593506,
"learning_rate": 3.0319443875838794e-05,
"loss": 1.1306,
"step": 14398
},
{
"epoch": 64.09333333333333,
"grad_norm": 5.112490177154541,
"learning_rate": 3.0167378215624974e-05,
"loss": 1.0993,
"step": 14421
},
{
"epoch": 64.19555555555556,
"grad_norm": 5.541341304779053,
"learning_rate": 3.0015529938945668e-05,
"loss": 1.0976,
"step": 14444
},
{
"epoch": 64.29777777777778,
"grad_norm": 5.937663555145264,
"learning_rate": 2.9863900710195758e-05,
"loss": 1.0953,
"step": 14467
},
{
"epoch": 64.4,
"grad_norm": 5.4565558433532715,
"learning_rate": 2.9712492191369244e-05,
"loss": 1.0998,
"step": 14490
},
{
"epoch": 64.50222222222222,
"grad_norm": 6.276011943817139,
"learning_rate": 2.956130604204089e-05,
"loss": 1.1113,
"step": 14513
},
{
"epoch": 64.60444444444444,
"grad_norm": 5.444122791290283,
"learning_rate": 2.9410343919348127e-05,
"loss": 1.108,
"step": 14536
},
{
"epoch": 64.70666666666666,
"grad_norm": 5.791774749755859,
"learning_rate": 2.9259607477972794e-05,
"loss": 1.1149,
"step": 14559
},
{
"epoch": 64.80888888888889,
"grad_norm": 6.028242588043213,
"learning_rate": 2.9109098370123132e-05,
"loss": 1.1236,
"step": 14582
},
{
"epoch": 64.91111111111111,
"grad_norm": 6.835079193115234,
"learning_rate": 2.8958818245515533e-05,
"loss": 1.1148,
"step": 14605
},
{
"epoch": 65.01333333333334,
"grad_norm": 5.5959792137146,
"learning_rate": 2.8808768751356564e-05,
"loss": 1.1054,
"step": 14628
},
{
"epoch": 65.11555555555556,
"grad_norm": 5.705920219421387,
"learning_rate": 2.865895153232489e-05,
"loss": 1.0824,
"step": 14651
},
{
"epoch": 65.21777777777778,
"grad_norm": 4.9849934577941895,
"learning_rate": 2.8509368230553157e-05,
"loss": 1.077,
"step": 14674
},
{
"epoch": 65.32,
"grad_norm": 5.702665328979492,
"learning_rate": 2.8360020485610163e-05,
"loss": 1.0514,
"step": 14697
},
{
"epoch": 65.42222222222222,
"grad_norm": 5.4493207931518555,
"learning_rate": 2.8210909934482678e-05,
"loss": 1.0653,
"step": 14720
},
{
"epoch": 65.52444444444444,
"grad_norm": 5.684943199157715,
"learning_rate": 2.8062038211557728e-05,
"loss": 1.0641,
"step": 14743
},
{
"epoch": 65.62666666666667,
"grad_norm": 5.757254123687744,
"learning_rate": 2.791340694860446e-05,
"loss": 1.0754,
"step": 14766
},
{
"epoch": 65.72888888888889,
"grad_norm": 5.588274955749512,
"learning_rate": 2.776501777475644e-05,
"loss": 1.0768,
"step": 14789
},
{
"epoch": 65.83111111111111,
"grad_norm": 5.547431945800781,
"learning_rate": 2.7616872316493708e-05,
"loss": 1.078,
"step": 14812
},
{
"epoch": 65.93333333333334,
"grad_norm": 5.201080322265625,
"learning_rate": 2.7468972197624897e-05,
"loss": 1.0824,
"step": 14835
},
{
"epoch": 66.03555555555556,
"grad_norm": 6.8083271980285645,
"learning_rate": 2.7321319039269576e-05,
"loss": 1.07,
"step": 14858
},
{
"epoch": 66.13777777777777,
"grad_norm": 6.262781620025635,
"learning_rate": 2.7173914459840342e-05,
"loss": 1.0395,
"step": 14881
},
{
"epoch": 66.24,
"grad_norm": 5.109470844268799,
"learning_rate": 2.7026760075025192e-05,
"loss": 1.0467,
"step": 14904
},
{
"epoch": 66.34222222222222,
"grad_norm": 5.397584915161133,
"learning_rate": 2.6879857497769712e-05,
"loss": 1.0531,
"step": 14927
},
{
"epoch": 66.44444444444444,
"grad_norm": 5.602553844451904,
"learning_rate": 2.6733208338259486e-05,
"loss": 1.045,
"step": 14950
},
{
"epoch": 66.54666666666667,
"grad_norm": 5.551428318023682,
"learning_rate": 2.6586814203902422e-05,
"loss": 1.042,
"step": 14973
},
{
"epoch": 66.64888888888889,
"grad_norm": 5.80933952331543,
"learning_rate": 2.6440676699311062e-05,
"loss": 1.0555,
"step": 14996
},
{
"epoch": 66.75111111111111,
"grad_norm": 5.058752536773682,
"learning_rate": 2.6294797426285112e-05,
"loss": 1.0507,
"step": 15019
},
{
"epoch": 66.85333333333334,
"grad_norm": 7.067930221557617,
"learning_rate": 2.6149177983793783e-05,
"loss": 1.0599,
"step": 15042
},
{
"epoch": 66.95555555555555,
"grad_norm": 5.901451587677002,
"learning_rate": 2.6003819967958344e-05,
"loss": 1.0527,
"step": 15065
},
{
"epoch": 67.05777777777777,
"grad_norm": 5.727104663848877,
"learning_rate": 2.5858724972034555e-05,
"loss": 1.0395,
"step": 15088
},
{
"epoch": 67.16,
"grad_norm": 7.644411563873291,
"learning_rate": 2.5713894586395283e-05,
"loss": 1.0326,
"step": 15111
},
{
"epoch": 67.26222222222222,
"grad_norm": 4.788581848144531,
"learning_rate": 2.5569330398512957e-05,
"loss": 1.0388,
"step": 15134
},
{
"epoch": 67.36444444444444,
"grad_norm": 4.921880722045898,
"learning_rate": 2.5425033992942316e-05,
"loss": 1.0413,
"step": 15157
},
{
"epoch": 67.46666666666667,
"grad_norm": 5.7385735511779785,
"learning_rate": 2.5281006951302934e-05,
"loss": 1.0328,
"step": 15180
},
{
"epoch": 67.56888888888889,
"grad_norm": 5.9198689460754395,
"learning_rate": 2.5137250852261862e-05,
"loss": 1.0416,
"step": 15203
},
{
"epoch": 67.67111111111112,
"grad_norm": 5.01896858215332,
"learning_rate": 2.499376727151646e-05,
"loss": 1.0455,
"step": 15226
},
{
"epoch": 67.77333333333333,
"grad_norm": 5.580973148345947,
"learning_rate": 2.485055778177696e-05,
"loss": 1.0487,
"step": 15249
},
{
"epoch": 67.87555555555555,
"grad_norm": 4.777526378631592,
"learning_rate": 2.470762395274938e-05,
"loss": 1.0434,
"step": 15272
},
{
"epoch": 67.97777777777777,
"grad_norm": 7.526794910430908,
"learning_rate": 2.4564967351118175e-05,
"loss": 1.0477,
"step": 15295
},
{
"epoch": 68.08,
"grad_norm": 6.90614128112793,
"learning_rate": 2.4422589540529185e-05,
"loss": 1.0341,
"step": 15318
},
{
"epoch": 68.18222222222222,
"grad_norm": 6.120336532592773,
"learning_rate": 2.4280492081572455e-05,
"loss": 1.0169,
"step": 15341
},
{
"epoch": 68.28444444444445,
"grad_norm": 5.239770889282227,
"learning_rate": 2.413867653176506e-05,
"loss": 1.0155,
"step": 15364
},
{
"epoch": 68.38666666666667,
"grad_norm": 5.342464923858643,
"learning_rate": 2.3997144445534175e-05,
"loss": 1.0343,
"step": 15387
},
{
"epoch": 68.4888888888889,
"grad_norm": 6.170787811279297,
"learning_rate": 2.3855897374199883e-05,
"loss": 1.0101,
"step": 15410
},
{
"epoch": 68.5911111111111,
"grad_norm": 7.313038349151611,
"learning_rate": 2.371493686595831e-05,
"loss": 1.0369,
"step": 15433
},
{
"epoch": 68.69333333333333,
"grad_norm": 5.434996604919434,
"learning_rate": 2.3574264465864527e-05,
"loss": 1.0345,
"step": 15456
},
{
"epoch": 68.79555555555555,
"grad_norm": 6.723358631134033,
"learning_rate": 2.343388171581573e-05,
"loss": 1.0309,
"step": 15479
},
{
"epoch": 68.89777777777778,
"grad_norm": 5.317188262939453,
"learning_rate": 2.3293790154534283e-05,
"loss": 1.0314,
"step": 15502
},
{
"epoch": 69.0,
"grad_norm": 6.149099349975586,
"learning_rate": 2.315399131755081e-05,
"loss": 1.0313,
"step": 15525
},
{
"epoch": 69.10222222222222,
"grad_norm": 5.885578632354736,
"learning_rate": 2.3014486737187475e-05,
"loss": 1.0127,
"step": 15548
},
{
"epoch": 69.20444444444445,
"grad_norm": 5.442347049713135,
"learning_rate": 2.2875277942541057e-05,
"loss": 1.0002,
"step": 15571
},
{
"epoch": 69.30666666666667,
"grad_norm": 5.002798080444336,
"learning_rate": 2.2736366459466326e-05,
"loss": 1.0208,
"step": 15594
},
{
"epoch": 69.4088888888889,
"grad_norm": 4.764693737030029,
"learning_rate": 2.259775381055917e-05,
"loss": 1.0147,
"step": 15617
},
{
"epoch": 69.5111111111111,
"grad_norm": 5.556255340576172,
"learning_rate": 2.2459441515140044e-05,
"loss": 0.9888,
"step": 15640
},
{
"epoch": 69.61333333333333,
"grad_norm": 5.241755485534668,
"learning_rate": 2.2321431089237256e-05,
"loss": 0.9846,
"step": 15663
},
{
"epoch": 69.71555555555555,
"grad_norm": 5.701202869415283,
"learning_rate": 2.2183724045570286e-05,
"loss": 0.9872,
"step": 15686
},
{
"epoch": 69.81777777777778,
"grad_norm": 8.224358558654785,
"learning_rate": 2.2046321893533362e-05,
"loss": 0.9898,
"step": 15709
},
{
"epoch": 69.92,
"grad_norm": 5.965829849243164,
"learning_rate": 2.1909226139178723e-05,
"loss": 0.9831,
"step": 15732
},
{
"epoch": 70.02222222222223,
"grad_norm": 5.391206741333008,
"learning_rate": 2.1772438285200312e-05,
"loss": 0.9954,
"step": 15755
},
{
"epoch": 70.12444444444445,
"grad_norm": 6.74372673034668,
"learning_rate": 2.1635959830917107e-05,
"loss": 0.9651,
"step": 15778
},
{
"epoch": 70.22666666666667,
"grad_norm": 5.2756123542785645,
"learning_rate": 2.149979227225688e-05,
"loss": 0.9698,
"step": 15801
},
{
"epoch": 70.32888888888888,
"grad_norm": 6.822518825531006,
"learning_rate": 2.1363937101739613e-05,
"loss": 0.9771,
"step": 15824
},
{
"epoch": 70.43111111111111,
"grad_norm": 5.256137847900391,
"learning_rate": 2.1228395808461294e-05,
"loss": 0.9962,
"step": 15847
},
{
"epoch": 70.53333333333333,
"grad_norm": 4.483437538146973,
"learning_rate": 2.1093169878077533e-05,
"loss": 0.9735,
"step": 15870
},
{
"epoch": 70.63555555555556,
"grad_norm": 6.114633083343506,
"learning_rate": 2.0958260792787215e-05,
"loss": 0.9839,
"step": 15893
},
{
"epoch": 70.73777777777778,
"grad_norm": 5.309250831604004,
"learning_rate": 2.08236700313164e-05,
"loss": 0.9745,
"step": 15916
},
{
"epoch": 70.84,
"grad_norm": 5.820844650268555,
"learning_rate": 2.068939906890194e-05,
"loss": 0.9786,
"step": 15939
},
{
"epoch": 70.94222222222223,
"grad_norm": 5.038022041320801,
"learning_rate": 2.055544937727549e-05,
"loss": 0.9912,
"step": 15962
},
{
"epoch": 71.04444444444445,
"grad_norm": 5.100025177001953,
"learning_rate": 2.042182242464719e-05,
"loss": 0.9748,
"step": 15985
},
{
"epoch": 71.14666666666666,
"grad_norm": 5.8269829750061035,
"learning_rate": 2.0288519675689755e-05,
"loss": 0.9614,
"step": 16008
},
{
"epoch": 71.24888888888889,
"grad_norm": 5.484350681304932,
"learning_rate": 2.0155542591522303e-05,
"loss": 0.9655,
"step": 16031
},
{
"epoch": 71.35111111111111,
"grad_norm": 5.463179111480713,
"learning_rate": 2.0022892629694335e-05,
"loss": 0.9633,
"step": 16054
},
{
"epoch": 71.45333333333333,
"grad_norm": 6.4749579429626465,
"learning_rate": 1.9890571244169854e-05,
"loss": 0.9643,
"step": 16077
},
{
"epoch": 71.55555555555556,
"grad_norm": 5.12134313583374,
"learning_rate": 1.97585798853113e-05,
"loss": 0.9771,
"step": 16100
},
{
"epoch": 71.65777777777778,
"grad_norm": 5.494293212890625,
"learning_rate": 1.9626919999863802e-05,
"loss": 0.9833,
"step": 16123
},
{
"epoch": 71.76,
"grad_norm": 6.645090579986572,
"learning_rate": 1.9495593030939157e-05,
"loss": 0.966,
"step": 16146
},
{
"epoch": 71.86222222222223,
"grad_norm": 5.469064235687256,
"learning_rate": 1.9364600418000156e-05,
"loss": 0.9752,
"step": 16169
},
{
"epoch": 71.96444444444444,
"grad_norm": 7.400743007659912,
"learning_rate": 1.9233943596844734e-05,
"loss": 0.9729,
"step": 16192
},
{
"epoch": 72.06666666666666,
"grad_norm": 5.228180408477783,
"learning_rate": 1.9103623999590202e-05,
"loss": 0.9706,
"step": 16215
},
{
"epoch": 72.16888888888889,
"grad_norm": 5.571268081665039,
"learning_rate": 1.897364305465766e-05,
"loss": 0.9544,
"step": 16238
},
{
"epoch": 72.27111111111111,
"grad_norm": 5.692650318145752,
"learning_rate": 1.884400218675619e-05,
"loss": 0.9577,
"step": 16261
},
{
"epoch": 72.37333333333333,
"grad_norm": 5.098461151123047,
"learning_rate": 1.87147028168674e-05,
"loss": 0.952,
"step": 16284
},
{
"epoch": 72.47555555555556,
"grad_norm": 5.3133745193481445,
"learning_rate": 1.8585746362229706e-05,
"loss": 0.9623,
"step": 16307
},
{
"epoch": 72.57777777777778,
"grad_norm": 5.299659729003906,
"learning_rate": 1.8457134236322903e-05,
"loss": 0.9505,
"step": 16330
},
{
"epoch": 72.68,
"grad_norm": 6.57431173324585,
"learning_rate": 1.832886784885263e-05,
"loss": 0.9665,
"step": 16353
},
{
"epoch": 72.78222222222222,
"grad_norm": 5.018616199493408,
"learning_rate": 1.820094860573488e-05,
"loss": 0.9565,
"step": 16376
},
{
"epoch": 72.88444444444444,
"grad_norm": 5.487111568450928,
"learning_rate": 1.8073377909080685e-05,
"loss": 0.9551,
"step": 16399
},
{
"epoch": 72.98666666666666,
"grad_norm": 6.0984086990356445,
"learning_rate": 1.7946157157180628e-05,
"loss": 0.9743,
"step": 16422
},
{
"epoch": 73.08888888888889,
"grad_norm": 5.412441730499268,
"learning_rate": 1.7819287744489636e-05,
"loss": 0.9316,
"step": 16445
},
{
"epoch": 73.19111111111111,
"grad_norm": 5.8434929847717285,
"learning_rate": 1.7692771061611603e-05,
"loss": 0.947,
"step": 16468
},
{
"epoch": 73.29333333333334,
"grad_norm": 5.178957462310791,
"learning_rate": 1.756660849528422e-05,
"loss": 0.9455,
"step": 16491
},
{
"epoch": 73.39555555555556,
"grad_norm": 6.5831499099731445,
"learning_rate": 1.7440801428363677e-05,
"loss": 0.9469,
"step": 16514
},
{
"epoch": 73.49777777777778,
"grad_norm": 5.628024101257324,
"learning_rate": 1.731535123980964e-05,
"loss": 0.961,
"step": 16537
},
{
"epoch": 73.6,
"grad_norm": 4.770416736602783,
"learning_rate": 1.7190259304670038e-05,
"loss": 0.9489,
"step": 16560
},
{
"epoch": 73.70222222222222,
"grad_norm": 5.419926166534424,
"learning_rate": 1.7065526994065973e-05,
"loss": 0.9384,
"step": 16583
},
{
"epoch": 73.80444444444444,
"grad_norm": 5.695985794067383,
"learning_rate": 1.6941155675176823e-05,
"loss": 0.9386,
"step": 16606
},
{
"epoch": 73.90666666666667,
"grad_norm": 5.251271724700928,
"learning_rate": 1.6817146711225073e-05,
"loss": 0.9577,
"step": 16629
},
{
"epoch": 74.00888888888889,
"grad_norm": 5.220533847808838,
"learning_rate": 1.669350146146156e-05,
"loss": 0.9513,
"step": 16652
},
{
"epoch": 74.11111111111111,
"grad_norm": 5.326650142669678,
"learning_rate": 1.65702212811504e-05,
"loss": 0.9399,
"step": 16675
},
{
"epoch": 74.21333333333334,
"grad_norm": 5.140909194946289,
"learning_rate": 1.6447307521554273e-05,
"loss": 0.9273,
"step": 16698
},
{
"epoch": 74.31555555555556,
"grad_norm": 5.344797611236572,
"learning_rate": 1.6324761529919556e-05,
"loss": 0.942,
"step": 16721
},
{
"epoch": 74.41777777777777,
"grad_norm": 5.0787835121154785,
"learning_rate": 1.6202584649461505e-05,
"loss": 0.9358,
"step": 16744
},
{
"epoch": 74.52,
"grad_norm": 4.678197383880615,
"learning_rate": 1.608077821934965e-05,
"loss": 0.9313,
"step": 16767
},
{
"epoch": 74.62222222222222,
"grad_norm": 5.813838005065918,
"learning_rate": 1.5959343574692982e-05,
"loss": 0.9375,
"step": 16790
},
{
"epoch": 74.72444444444444,
"grad_norm": 7.276843070983887,
"learning_rate": 1.5838282046525444e-05,
"loss": 0.9359,
"step": 16813
},
{
"epoch": 74.82666666666667,
"grad_norm": 5.635644435882568,
"learning_rate": 1.571759496179123e-05,
"loss": 0.9444,
"step": 16836
},
{
"epoch": 74.92888888888889,
"grad_norm": 5.5287556648254395,
"learning_rate": 1.5597283643330347e-05,
"loss": 0.9345,
"step": 16859
},
{
"epoch": 75.03111111111112,
"grad_norm": 5.956721782684326,
"learning_rate": 1.547734940986404e-05,
"loss": 0.9618,
"step": 16882
},
{
"epoch": 75.13333333333334,
"grad_norm": 6.450102806091309,
"learning_rate": 1.535779357598033e-05,
"loss": 0.9266,
"step": 16905
},
{
"epoch": 75.23555555555555,
"grad_norm": 5.966337203979492,
"learning_rate": 1.5238617452119697e-05,
"loss": 0.9089,
"step": 16928
},
{
"epoch": 75.33777777777777,
"grad_norm": 5.400455474853516,
"learning_rate": 1.5119822344560591e-05,
"loss": 0.8967,
"step": 16951
},
{
"epoch": 75.44,
"grad_norm": 5.6878180503845215,
"learning_rate": 1.5001409555405238e-05,
"loss": 0.9058,
"step": 16974
},
{
"epoch": 75.54222222222222,
"grad_norm": 5.092850685119629,
"learning_rate": 1.4883380382565244e-05,
"loss": 0.9037,
"step": 16997
},
{
"epoch": 75.64444444444445,
"grad_norm": 7.444413185119629,
"learning_rate": 1.4765736119747475e-05,
"loss": 0.9191,
"step": 17020
},
{
"epoch": 75.74666666666667,
"grad_norm": 5.114320755004883,
"learning_rate": 1.4648478056439847e-05,
"loss": 0.9132,
"step": 17043
},
{
"epoch": 75.8488888888889,
"grad_norm": 5.615855693817139,
"learning_rate": 1.453160747789712e-05,
"loss": 0.9064,
"step": 17066
},
{
"epoch": 75.95111111111112,
"grad_norm": 5.120584964752197,
"learning_rate": 1.4415125665126933e-05,
"loss": 0.9149,
"step": 17089
},
{
"epoch": 76.05333333333333,
"grad_norm": 5.242557048797607,
"learning_rate": 1.4299033894875647e-05,
"loss": 0.8938,
"step": 17112
},
{
"epoch": 76.15555555555555,
"grad_norm": 5.4338297843933105,
"learning_rate": 1.4183333439614449e-05,
"loss": 0.8982,
"step": 17135
},
{
"epoch": 76.25777777777778,
"grad_norm": 4.58558988571167,
"learning_rate": 1.4068025567525317e-05,
"loss": 0.8992,
"step": 17158
},
{
"epoch": 76.36,
"grad_norm": 5.754461765289307,
"learning_rate": 1.3953111542487202e-05,
"loss": 0.91,
"step": 17181
},
{
"epoch": 76.46222222222222,
"grad_norm": 4.953834533691406,
"learning_rate": 1.383859262406208e-05,
"loss": 0.9014,
"step": 17204
},
{
"epoch": 76.56444444444445,
"grad_norm": 5.375875473022461,
"learning_rate": 1.3724470067481255e-05,
"loss": 0.9027,
"step": 17227
},
{
"epoch": 76.66666666666667,
"grad_norm": 5.019064426422119,
"learning_rate": 1.3610745123631535e-05,
"loss": 0.8902,
"step": 17250
},
{
"epoch": 76.7688888888889,
"grad_norm": 7.214736461639404,
"learning_rate": 1.3497419039041488e-05,
"loss": 0.9004,
"step": 17273
},
{
"epoch": 76.8711111111111,
"grad_norm": 5.181694507598877,
"learning_rate": 1.3384493055867885e-05,
"loss": 0.8949,
"step": 17296
},
{
"epoch": 76.97333333333333,
"grad_norm": 5.116537094116211,
"learning_rate": 1.3271968411881963e-05,
"loss": 0.8958,
"step": 17319
},
{
"epoch": 77.07555555555555,
"grad_norm": 4.765411853790283,
"learning_rate": 1.3159846340455967e-05,
"loss": 0.8901,
"step": 17342
},
{
"epoch": 77.17777777777778,
"grad_norm": 4.765920639038086,
"learning_rate": 1.3048128070549543e-05,
"loss": 0.8875,
"step": 17365
},
{
"epoch": 77.28,
"grad_norm": 4.69777250289917,
"learning_rate": 1.2936814826696324e-05,
"loss": 0.881,
"step": 17388
},
{
"epoch": 77.38222222222223,
"grad_norm": 4.7684550285339355,
"learning_rate": 1.2825907828990518e-05,
"loss": 0.8835,
"step": 17411
},
{
"epoch": 77.48444444444445,
"grad_norm": 4.776817321777344,
"learning_rate": 1.271540829307344e-05,
"loss": 0.8896,
"step": 17434
},
{
"epoch": 77.58666666666667,
"grad_norm": 4.983736038208008,
"learning_rate": 1.2605317430120311e-05,
"loss": 0.8845,
"step": 17457
},
{
"epoch": 77.68888888888888,
"grad_norm": 5.313802719116211,
"learning_rate": 1.2495636446826891e-05,
"loss": 0.8922,
"step": 17480
},
{
"epoch": 77.7911111111111,
"grad_norm": 4.997971534729004,
"learning_rate": 1.2386366545396328e-05,
"loss": 0.8856,
"step": 17503
},
{
"epoch": 77.89333333333333,
"grad_norm": 5.876720905303955,
"learning_rate": 1.2277508923525876e-05,
"loss": 0.8838,
"step": 17526
},
{
"epoch": 77.99555555555555,
"grad_norm": 4.762071132659912,
"learning_rate": 1.216906477439389e-05,
"loss": 0.8814,
"step": 17549
},
{
"epoch": 78.09777777777778,
"grad_norm": 4.621342658996582,
"learning_rate": 1.2061035286646677e-05,
"loss": 0.8764,
"step": 17572
},
{
"epoch": 78.2,
"grad_norm": 5.084928035736084,
"learning_rate": 1.1953421644385443e-05,
"loss": 0.8747,
"step": 17595
},
{
"epoch": 78.30222222222223,
"grad_norm": 4.952382564544678,
"learning_rate": 1.1846225027153401e-05,
"loss": 0.8886,
"step": 17618
},
{
"epoch": 78.40444444444445,
"grad_norm": 4.579256534576416,
"learning_rate": 1.1739446609922739e-05,
"loss": 0.8729,
"step": 17641
},
{
"epoch": 78.50666666666666,
"grad_norm": 5.518742561340332,
"learning_rate": 1.1633087563081847e-05,
"loss": 0.8863,
"step": 17664
},
{
"epoch": 78.60888888888888,
"grad_norm": 4.966059684753418,
"learning_rate": 1.1527149052422382e-05,
"loss": 0.8839,
"step": 17687
},
{
"epoch": 78.71111111111111,
"grad_norm": 5.001364707946777,
"learning_rate": 1.1421632239126578e-05,
"loss": 0.8893,
"step": 17710
},
{
"epoch": 78.81333333333333,
"grad_norm": 4.7873854637146,
"learning_rate": 1.131653827975449e-05,
"loss": 0.8695,
"step": 17733
},
{
"epoch": 78.91555555555556,
"grad_norm": 5.2424516677856445,
"learning_rate": 1.1211868326231273e-05,
"loss": 0.8857,
"step": 17756
},
{
"epoch": 79.01777777777778,
"grad_norm": 4.72099494934082,
"learning_rate": 1.1107623525834631e-05,
"loss": 0.8844,
"step": 17779
},
{
"epoch": 79.12,
"grad_norm": 5.387650489807129,
"learning_rate": 1.1003805021182168e-05,
"loss": 0.8672,
"step": 17802
},
{
"epoch": 79.22222222222223,
"grad_norm": 6.549093246459961,
"learning_rate": 1.0900413950218947e-05,
"loss": 0.8639,
"step": 17825
},
{
"epoch": 79.32444444444444,
"grad_norm": 5.805511951446533,
"learning_rate": 1.0797451446204904e-05,
"loss": 0.8738,
"step": 17848
},
{
"epoch": 79.42666666666666,
"grad_norm": 5.417078018188477,
"learning_rate": 1.0694918637702562e-05,
"loss": 0.8815,
"step": 17871
},
{
"epoch": 79.52888888888889,
"grad_norm": 4.696217060089111,
"learning_rate": 1.0592816648564535e-05,
"loss": 0.8824,
"step": 17894
},
{
"epoch": 79.63111111111111,
"grad_norm": 4.98297119140625,
"learning_rate": 1.0491146597921309e-05,
"loss": 0.8617,
"step": 17917
},
{
"epoch": 79.73333333333333,
"grad_norm": 4.85457181930542,
"learning_rate": 1.0389909600168911e-05,
"loss": 0.8715,
"step": 17940
},
{
"epoch": 79.83555555555556,
"grad_norm": 5.266817092895508,
"learning_rate": 1.0289106764956702e-05,
"loss": 0.8754,
"step": 17963
},
{
"epoch": 79.93777777777778,
"grad_norm": 5.948962688446045,
"learning_rate": 1.0188739197175268e-05,
"loss": 0.8806,
"step": 17986
},
{
"epoch": 80.04,
"grad_norm": 6.155448913574219,
"learning_rate": 1.0088807996944211e-05,
"loss": 0.8767,
"step": 18009
},
{
"epoch": 80.14222222222222,
"grad_norm": 4.785376071929932,
"learning_rate": 9.989314259600219e-06,
"loss": 0.8719,
"step": 18032
},
{
"epoch": 80.24444444444444,
"grad_norm": 4.980493545532227,
"learning_rate": 9.890259075684915e-06,
"loss": 0.866,
"step": 18055
},
{
"epoch": 80.34666666666666,
"grad_norm": 6.7485032081604,
"learning_rate": 9.791643530933032e-06,
"loss": 0.8639,
"step": 18078
},
{
"epoch": 80.44888888888889,
"grad_norm": 5.030679225921631,
"learning_rate": 9.693468706260456e-06,
"loss": 0.8707,
"step": 18101
},
{
"epoch": 80.55111111111111,
"grad_norm": 5.043888568878174,
"learning_rate": 9.595735677752343e-06,
"loss": 0.8603,
"step": 18124
},
{
"epoch": 80.65333333333334,
"grad_norm": 5.022198677062988,
"learning_rate": 9.49844551665141e-06,
"loss": 0.8598,
"step": 18147
},
{
"epoch": 80.75555555555556,
"grad_norm": 6.346147060394287,
"learning_rate": 9.401599289346091e-06,
"loss": 0.8663,
"step": 18170
},
{
"epoch": 80.85777777777778,
"grad_norm": 5.1296610832214355,
"learning_rate": 9.305198057358972e-06,
"loss": 0.8703,
"step": 18193
},
{
"epoch": 80.96,
"grad_norm": 5.117784023284912,
"learning_rate": 9.209242877335005e-06,
"loss": 0.8624,
"step": 18216
},
{
"epoch": 81.06222222222222,
"grad_norm": 4.949360370635986,
"learning_rate": 9.113734801030076e-06,
"loss": 0.8559,
"step": 18239
},
{
"epoch": 81.16444444444444,
"grad_norm": 4.507094860076904,
"learning_rate": 9.018674875299393e-06,
"loss": 0.861,
"step": 18262
},
{
"epoch": 81.26666666666667,
"grad_norm": 5.280154705047607,
"learning_rate": 8.924064142085985e-06,
"loss": 0.8558,
"step": 18285
},
{
"epoch": 81.36888888888889,
"grad_norm": 4.777374267578125,
"learning_rate": 8.829903638409388e-06,
"loss": 0.8598,
"step": 18308
},
{
"epoch": 81.47111111111111,
"grad_norm": 5.726168632507324,
"learning_rate": 8.736194396354153e-06,
"loss": 0.8649,
"step": 18331
},
{
"epoch": 81.57333333333334,
"grad_norm": 5.1066484451293945,
"learning_rate": 8.642937443058646e-06,
"loss": 0.8558,
"step": 18354
},
{
"epoch": 81.67555555555556,
"grad_norm": 5.291098117828369,
"learning_rate": 8.550133800703686e-06,
"loss": 0.8572,
"step": 18377
},
{
"epoch": 81.77777777777777,
"grad_norm": 4.3951334953308105,
"learning_rate": 8.457784486501452e-06,
"loss": 0.8713,
"step": 18400
},
{
"epoch": 81.88,
"grad_norm": 4.807311058044434,
"learning_rate": 8.36589051268421e-06,
"loss": 0.8704,
"step": 18423
},
{
"epoch": 81.98222222222222,
"grad_norm": 6.832765579223633,
"learning_rate": 8.274452886493333e-06,
"loss": 0.862,
"step": 18446
},
{
"epoch": 82.08444444444444,
"grad_norm": 4.566845417022705,
"learning_rate": 8.183472610168197e-06,
"loss": 0.8604,
"step": 18469
},
{
"epoch": 82.18666666666667,
"grad_norm": 4.8708648681640625,
"learning_rate": 8.092950680935185e-06,
"loss": 0.8589,
"step": 18492
},
{
"epoch": 82.28888888888889,
"grad_norm": 5.396876335144043,
"learning_rate": 8.002888090996814e-06,
"loss": 0.8608,
"step": 18515
},
{
"epoch": 82.39111111111112,
"grad_norm": 4.885883808135986,
"learning_rate": 7.913285827520794e-06,
"loss": 0.8484,
"step": 18538
},
{
"epoch": 82.49333333333334,
"grad_norm": 4.598787307739258,
"learning_rate": 7.824144872629269e-06,
"loss": 0.8576,
"step": 18561
},
{
"epoch": 82.59555555555555,
"grad_norm": 4.590323448181152,
"learning_rate": 7.735466203387992e-06,
"loss": 0.8554,
"step": 18584
},
{
"epoch": 82.69777777777777,
"grad_norm": 5.497690200805664,
"learning_rate": 7.647250791795668e-06,
"loss": 0.855,
"step": 18607
},
{
"epoch": 82.8,
"grad_norm": 4.905009746551514,
"learning_rate": 7.559499604773279e-06,
"loss": 0.8563,
"step": 18630
},
{
"epoch": 82.90222222222222,
"grad_norm": 4.675111770629883,
"learning_rate": 7.47221360415346e-06,
"loss": 0.8597,
"step": 18653
},
{
"epoch": 83.00444444444445,
"grad_norm": 5.6808576583862305,
"learning_rate": 7.385393746670022e-06,
"loss": 0.8566,
"step": 18676
},
{
"epoch": 83.10666666666667,
"grad_norm": 6.699379920959473,
"learning_rate": 7.299040983947369e-06,
"loss": 0.856,
"step": 18699
},
{
"epoch": 83.2088888888889,
"grad_norm": 5.053982257843018,
"learning_rate": 7.213156262490173e-06,
"loss": 0.8481,
"step": 18722
},
{
"epoch": 83.31111111111112,
"grad_norm": 5.297053337097168,
"learning_rate": 7.127740523672915e-06,
"loss": 0.85,
"step": 18745
},
{
"epoch": 83.41333333333333,
"grad_norm": 5.744291305541992,
"learning_rate": 7.042794703729622e-06,
"loss": 0.8618,
"step": 18768
},
{
"epoch": 83.51555555555555,
"grad_norm": 4.679412364959717,
"learning_rate": 6.95831973374359e-06,
"loss": 0.8403,
"step": 18791
},
{
"epoch": 83.61777777777777,
"grad_norm": 4.38852596282959,
"learning_rate": 6.874316539637127e-06,
"loss": 0.8464,
"step": 18814
},
{
"epoch": 83.72,
"grad_norm": 4.899384021759033,
"learning_rate": 6.7907860421615066e-06,
"loss": 0.8523,
"step": 18837
},
{
"epoch": 83.82222222222222,
"grad_norm": 5.16193962097168,
"learning_rate": 6.707729156886777e-06,
"loss": 0.8502,
"step": 18860
},
{
"epoch": 83.92444444444445,
"grad_norm": 4.833446979522705,
"learning_rate": 6.625146794191794e-06,
"loss": 0.8551,
"step": 18883
},
{
"epoch": 84.02666666666667,
"grad_norm": 4.920324325561523,
"learning_rate": 6.543039859254185e-06,
"loss": 0.8525,
"step": 18906
},
{
"epoch": 84.1288888888889,
"grad_norm": 5.322509765625,
"learning_rate": 6.4614092520404905e-06,
"loss": 0.8534,
"step": 18929
},
{
"epoch": 84.2311111111111,
"grad_norm": 5.062963485717773,
"learning_rate": 6.380255867296253e-06,
"loss": 0.8519,
"step": 18952
},
{
"epoch": 84.33333333333333,
"grad_norm": 5.186446666717529,
"learning_rate": 6.299580594536214e-06,
"loss": 0.8445,
"step": 18975
},
{
"epoch": 84.43555555555555,
"grad_norm": 5.609063148498535,
"learning_rate": 6.219384318034588e-06,
"loss": 0.8432,
"step": 18998
},
{
"epoch": 84.53777777777778,
"grad_norm": 4.684319972991943,
"learning_rate": 6.1396679168153445e-06,
"loss": 0.8434,
"step": 19021
},
{
"epoch": 84.64,
"grad_norm": 4.717188835144043,
"learning_rate": 6.060432264642601e-06,
"loss": 0.8451,
"step": 19044
},
{
"epoch": 84.74222222222222,
"grad_norm": 6.810020446777344,
"learning_rate": 5.981678230011006e-06,
"loss": 0.8425,
"step": 19067
},
{
"epoch": 84.84444444444445,
"grad_norm": 4.562713146209717,
"learning_rate": 5.903406676136264e-06,
"loss": 0.8468,
"step": 19090
},
{
"epoch": 84.94666666666667,
"grad_norm": 5.388665199279785,
"learning_rate": 5.825618460945636e-06,
"loss": 0.8418,
"step": 19113
},
{
"epoch": 85.04888888888888,
"grad_norm": 5.054759979248047,
"learning_rate": 5.748314437068558e-06,
"loss": 0.8417,
"step": 19136
},
{
"epoch": 85.1511111111111,
"grad_norm": 4.943572521209717,
"learning_rate": 5.671495451827308e-06,
"loss": 0.8444,
"step": 19159
},
{
"epoch": 85.25333333333333,
"grad_norm": 4.801841735839844,
"learning_rate": 5.595162347227661e-06,
"loss": 0.8407,
"step": 19182
},
{
"epoch": 85.35555555555555,
"grad_norm": 4.94541072845459,
"learning_rate": 5.519315959949745e-06,
"loss": 0.8413,
"step": 19205
},
{
"epoch": 85.45777777777778,
"grad_norm": 5.529304027557373,
"learning_rate": 5.443957121338777e-06,
"loss": 0.8462,
"step": 19228
},
{
"epoch": 85.56,
"grad_norm": 4.735396385192871,
"learning_rate": 5.36908665739605e-06,
"loss": 0.8491,
"step": 19251
},
{
"epoch": 85.66222222222223,
"grad_norm": 5.091115474700928,
"learning_rate": 5.294705388769772e-06,
"loss": 0.8444,
"step": 19274
},
{
"epoch": 85.76444444444445,
"grad_norm": 4.820996284484863,
"learning_rate": 5.220814130746165e-06,
"loss": 0.8509,
"step": 19297
},
{
"epoch": 85.86666666666666,
"grad_norm": 4.448352336883545,
"learning_rate": 5.1474136932404935e-06,
"loss": 0.8339,
"step": 19320
},
{
"epoch": 85.96888888888888,
"grad_norm": 4.6064019203186035,
"learning_rate": 5.07450488078815e-06,
"loss": 0.8115,
"step": 19343
},
{
"epoch": 86.07111111111111,
"grad_norm": 6.598939895629883,
"learning_rate": 5.002088492535906e-06,
"loss": 0.818,
"step": 19366
},
{
"epoch": 86.17333333333333,
"grad_norm": 4.426856994628906,
"learning_rate": 4.930165322233082e-06,
"loss": 0.8147,
"step": 19389
},
{
"epoch": 86.27555555555556,
"grad_norm": 4.873010635375977,
"learning_rate": 4.858736158222921e-06,
"loss": 0.8146,
"step": 19412
},
{
"epoch": 86.37777777777778,
"grad_norm": 4.8856520652771,
"learning_rate": 4.787801783433871e-06,
"loss": 0.8158,
"step": 19435
},
{
"epoch": 86.48,
"grad_norm": 5.177906513214111,
"learning_rate": 4.717362975371059e-06,
"loss": 0.8187,
"step": 19458
},
{
"epoch": 86.58222222222223,
"grad_norm": 4.954709529876709,
"learning_rate": 4.647420506107775e-06,
"loss": 0.8131,
"step": 19481
},
{
"epoch": 86.68444444444444,
"grad_norm": 4.427014350891113,
"learning_rate": 4.577975142276925e-06,
"loss": 0.8263,
"step": 19504
},
{
"epoch": 86.78666666666666,
"grad_norm": 5.581162929534912,
"learning_rate": 4.509027645062758e-06,
"loss": 0.8201,
"step": 19527
},
{
"epoch": 86.88888888888889,
"grad_norm": 4.889328479766846,
"learning_rate": 4.4405787701923885e-06,
"loss": 0.8239,
"step": 19550
},
{
"epoch": 86.99111111111111,
"grad_norm": 4.658565998077393,
"learning_rate": 4.3726292679276305e-06,
"loss": 0.8211,
"step": 19573
},
{
"epoch": 87.09333333333333,
"grad_norm": 5.102555751800537,
"learning_rate": 4.305179883056687e-06,
"loss": 0.8154,
"step": 19596
},
{
"epoch": 87.19555555555556,
"grad_norm": 4.951329231262207,
"learning_rate": 4.23823135488603e-06,
"loss": 0.8182,
"step": 19619
},
{
"epoch": 87.29777777777778,
"grad_norm": 5.642242908477783,
"learning_rate": 4.171784417232305e-06,
"loss": 0.8076,
"step": 19642
},
{
"epoch": 87.4,
"grad_norm": 5.003154277801514,
"learning_rate": 4.10583979841424e-06,
"loss": 0.8129,
"step": 19665
},
{
"epoch": 87.50222222222222,
"grad_norm": 5.778168678283691,
"learning_rate": 4.040398221244718e-06,
"loss": 0.8123,
"step": 19688
},
{
"epoch": 87.60444444444444,
"grad_norm": 5.08914852142334,
"learning_rate": 3.975460403022801e-06,
"loss": 0.8149,
"step": 19711
},
{
"epoch": 87.70666666666666,
"grad_norm": 4.585403919219971,
"learning_rate": 3.9110270555259345e-06,
"loss": 0.8197,
"step": 19734
},
{
"epoch": 87.80888888888889,
"grad_norm": 4.91745138168335,
"learning_rate": 3.84709888500207e-06,
"loss": 0.8175,
"step": 19757
},
{
"epoch": 87.91111111111111,
"grad_norm": 5.540400981903076,
"learning_rate": 3.7836765921619888e-06,
"loss": 0.8115,
"step": 19780
},
{
"epoch": 88.01333333333334,
"grad_norm": 4.485517501831055,
"learning_rate": 3.720760872171569e-06,
"loss": 0.8122,
"step": 19803
},
{
"epoch": 88.11555555555556,
"grad_norm": 4.355061054229736,
"learning_rate": 3.658352414644206e-06,
"loss": 0.8105,
"step": 19826
},
{
"epoch": 88.21777777777778,
"grad_norm": 5.2161784172058105,
"learning_rate": 3.596451903633247e-06,
"loss": 0.8115,
"step": 19849
},
{
"epoch": 88.32,
"grad_norm": 4.382901191711426,
"learning_rate": 3.535060017624453e-06,
"loss": 0.8118,
"step": 19872
},
{
"epoch": 88.42222222222222,
"grad_norm": 5.805255889892578,
"learning_rate": 3.47417742952863e-06,
"loss": 0.8046,
"step": 19895
},
{
"epoch": 88.52444444444444,
"grad_norm": 4.063962936401367,
"learning_rate": 3.4138048066741867e-06,
"loss": 0.8136,
"step": 19918
},
{
"epoch": 88.62666666666667,
"grad_norm": 5.049718379974365,
"learning_rate": 3.3539428107998814e-06,
"loss": 0.8071,
"step": 19941
},
{
"epoch": 88.72888888888889,
"grad_norm": 4.287143230438232,
"learning_rate": 3.294592098047494e-06,
"loss": 0.8064,
"step": 19964
},
{
"epoch": 88.83111111111111,
"grad_norm": 5.841145992279053,
"learning_rate": 3.2357533189547098e-06,
"loss": 0.8188,
"step": 19987
},
{
"epoch": 88.93333333333334,
"grad_norm": 6.014995098114014,
"learning_rate": 3.1774271184479675e-06,
"loss": 0.8114,
"step": 20010
},
{
"epoch": 89.03555555555556,
"grad_norm": 4.5376386642456055,
"learning_rate": 3.1196141358353357e-06,
"loss": 0.8135,
"step": 20033
},
{
"epoch": 89.13777777777777,
"grad_norm": 4.438096523284912,
"learning_rate": 3.0623150047995873e-06,
"loss": 0.8091,
"step": 20056
},
{
"epoch": 89.24,
"grad_norm": 4.940515518188477,
"learning_rate": 3.005530353391195e-06,
"loss": 0.812,
"step": 20079
},
{
"epoch": 89.34222222222222,
"grad_norm": 4.826828479766846,
"learning_rate": 2.9492608040214862e-06,
"loss": 0.8123,
"step": 20102
},
{
"epoch": 89.44444444444444,
"grad_norm": 4.983479976654053,
"learning_rate": 2.893506973455773e-06,
"loss": 0.8081,
"step": 20125
},
{
"epoch": 89.54666666666667,
"grad_norm": 6.005835056304932,
"learning_rate": 2.838269472806654e-06,
"loss": 0.8095,
"step": 20148
},
{
"epoch": 89.64888888888889,
"grad_norm": 4.9561662673950195,
"learning_rate": 2.7835489075272727e-06,
"loss": 0.8061,
"step": 20171
},
{
"epoch": 89.75111111111111,
"grad_norm": 5.078367233276367,
"learning_rate": 2.729345877404671e-06,
"loss": 0.7997,
"step": 20194
},
{
"epoch": 89.85333333333334,
"grad_norm": 4.345983505249023,
"learning_rate": 2.675660976553268e-06,
"loss": 0.8101,
"step": 20217
},
{
"epoch": 89.95555555555555,
"grad_norm": 4.390908241271973,
"learning_rate": 2.6224947934082923e-06,
"loss": 0.8016,
"step": 20240
},
{
"epoch": 90.05777777777777,
"grad_norm": 4.5562028884887695,
"learning_rate": 2.5698479107193697e-06,
"loss": 0.8039,
"step": 20263
},
{
"epoch": 90.16,
"grad_norm": 4.685390472412109,
"learning_rate": 2.517720905544102e-06,
"loss": 0.7952,
"step": 20286
},
{
"epoch": 90.26222222222222,
"grad_norm": 4.973295211791992,
"learning_rate": 2.466114349241794e-06,
"loss": 0.809,
"step": 20309
},
{
"epoch": 90.36444444444444,
"grad_norm": 5.430562496185303,
"learning_rate": 2.4150288074671346e-06,
"loss": 0.8088,
"step": 20332
},
{
"epoch": 90.46666666666667,
"grad_norm": 4.49529504776001,
"learning_rate": 2.3644648401640156e-06,
"loss": 0.8057,
"step": 20355
},
{
"epoch": 90.56888888888889,
"grad_norm": 5.173520565032959,
"learning_rate": 2.314423001559424e-06,
"loss": 0.8205,
"step": 20378
},
{
"epoch": 90.67111111111112,
"grad_norm": 5.084122657775879,
"learning_rate": 2.264903840157312e-06,
"loss": 0.8096,
"step": 20401
},
{
"epoch": 90.77333333333333,
"grad_norm": 4.675368309020996,
"learning_rate": 2.2159078987326554e-06,
"loss": 0.8109,
"step": 20424
},
{
"epoch": 90.87555555555555,
"grad_norm": 4.598373889923096,
"learning_rate": 2.167435714325411e-06,
"loss": 0.7989,
"step": 20447
},
{
"epoch": 90.97777777777777,
"grad_norm": 4.149188995361328,
"learning_rate": 2.1194878182347334e-06,
"loss": 0.8142,
"step": 20470
},
{
"epoch": 91.08,
"grad_norm": 5.164962291717529,
"learning_rate": 2.0720647360130685e-06,
"loss": 0.8096,
"step": 20493
},
{
"epoch": 91.18222222222222,
"grad_norm": 5.351869106292725,
"learning_rate": 2.0251669874604474e-06,
"loss": 0.8036,
"step": 20516
},
{
"epoch": 91.28444444444445,
"grad_norm": 5.2852935791015625,
"learning_rate": 1.9787950866187565e-06,
"loss": 0.8057,
"step": 20539
},
{
"epoch": 91.38666666666667,
"grad_norm": 6.784205436706543,
"learning_rate": 1.9329495417661046e-06,
"loss": 0.8031,
"step": 20562
},
{
"epoch": 91.4888888888889,
"grad_norm": 4.940450668334961,
"learning_rate": 1.887630855411282e-06,
"loss": 0.8066,
"step": 20585
},
{
"epoch": 91.5911111111111,
"grad_norm": 4.77994441986084,
"learning_rate": 1.84283952428822e-06,
"loss": 0.8038,
"step": 20608
},
{
"epoch": 91.69333333333333,
"grad_norm": 4.902866840362549,
"learning_rate": 1.798576039350558e-06,
"loss": 0.8043,
"step": 20631
},
{
"epoch": 91.79555555555555,
"grad_norm": 5.100454330444336,
"learning_rate": 1.7548408857662623e-06,
"loss": 0.8008,
"step": 20654
},
{
"epoch": 91.89777777777778,
"grad_norm": 4.9377264976501465,
"learning_rate": 1.7116345429123104e-06,
"loss": 0.8098,
"step": 20677
},
{
"epoch": 92.0,
"grad_norm": 5.0082292556762695,
"learning_rate": 1.6689574843694433e-06,
"loss": 0.7992,
"step": 20700
},
{
"epoch": 92.10222222222222,
"grad_norm": 4.688179016113281,
"learning_rate": 1.6268101779169375e-06,
"loss": 0.7928,
"step": 20723
},
{
"epoch": 92.20444444444445,
"grad_norm": 4.243449687957764,
"learning_rate": 1.5851930855275365e-06,
"loss": 0.7957,
"step": 20746
},
{
"epoch": 92.30666666666667,
"grad_norm": 4.956583499908447,
"learning_rate": 1.544106663362338e-06,
"loss": 0.8073,
"step": 20769
},
{
"epoch": 92.4088888888889,
"grad_norm": 4.556548118591309,
"learning_rate": 1.503551361765826e-06,
"loss": 0.8019,
"step": 20792
},
{
"epoch": 92.5111111111111,
"grad_norm": 6.762635707855225,
"learning_rate": 1.4635276252608965e-06,
"loss": 0.8084,
"step": 20815
},
{
"epoch": 92.61333333333333,
"grad_norm": 5.724966049194336,
"learning_rate": 1.4240358925440457e-06,
"loss": 0.8008,
"step": 20838
},
{
"epoch": 92.71555555555555,
"grad_norm": 5.445995330810547,
"learning_rate": 1.3850765964805e-06,
"loss": 0.802,
"step": 20861
},
{
"epoch": 92.81777777777778,
"grad_norm": 4.807301044464111,
"learning_rate": 1.3466501640994944e-06,
"loss": 0.8038,
"step": 20884
},
{
"epoch": 92.92,
"grad_norm": 5.612717151641846,
"learning_rate": 1.308757016589618e-06,
"loss": 0.7996,
"step": 20907
},
{
"epoch": 93.02222222222223,
"grad_norm": 4.5359296798706055,
"learning_rate": 1.2713975692941415e-06,
"loss": 0.801,
"step": 20930
},
{
"epoch": 93.12444444444445,
"grad_norm": 4.222482681274414,
"learning_rate": 1.2345722317065267e-06,
"loss": 0.7996,
"step": 20953
},
{
"epoch": 93.22666666666667,
"grad_norm": 4.250333786010742,
"learning_rate": 1.19828140746589e-06,
"loss": 0.8072,
"step": 20976
},
{
"epoch": 93.32888888888888,
"grad_norm": 4.197777271270752,
"learning_rate": 1.1625254943526065e-06,
"loss": 0.795,
"step": 20999
},
{
"epoch": 93.43111111111111,
"grad_norm": 5.79392671585083,
"learning_rate": 1.1273048842839307e-06,
"loss": 0.8076,
"step": 21022
},
{
"epoch": 93.53333333333333,
"grad_norm": 4.919564723968506,
"learning_rate": 1.0926199633097157e-06,
"loss": 0.802,
"step": 21045
},
{
"epoch": 93.63555555555556,
"grad_norm": 5.422025203704834,
"learning_rate": 1.0584711116081837e-06,
"loss": 0.8141,
"step": 21068
},
{
"epoch": 93.73777777777778,
"grad_norm": 4.949449062347412,
"learning_rate": 1.0248587034817237e-06,
"loss": 0.8001,
"step": 21091
},
{
"epoch": 93.84,
"grad_norm": 4.578461647033691,
"learning_rate": 9.917831073528504e-07,
"loss": 0.7959,
"step": 21114
},
{
"epoch": 93.94222222222223,
"grad_norm": 4.7736592292785645,
"learning_rate": 9.59244685760108e-07,
"loss": 0.8007,
"step": 21137
},
{
"epoch": 94.04444444444445,
"grad_norm": 4.64253044128418,
"learning_rate": 9.27243795354138e-07,
"loss": 0.8042,
"step": 21160
},
{
"epoch": 94.14666666666666,
"grad_norm": 5.671309471130371,
"learning_rate": 8.957807868937296e-07,
"loss": 0.7971,
"step": 21183
},
{
"epoch": 94.24888888888889,
"grad_norm": 4.637156963348389,
"learning_rate": 8.648560052420151e-07,
"loss": 0.8008,
"step": 21206
},
{
"epoch": 94.35111111111111,
"grad_norm": 4.140064239501953,
"learning_rate": 8.344697893626741e-07,
"loss": 0.7955,
"step": 21229
},
{
"epoch": 94.45333333333333,
"grad_norm": 4.615813732147217,
"learning_rate": 8.046224723162077e-07,
"loss": 0.7998,
"step": 21252
},
{
"epoch": 94.55555555555556,
"grad_norm": 5.006037712097168,
"learning_rate": 7.75314381256298e-07,
"loss": 0.7944,
"step": 21275
},
{
"epoch": 94.65777777777778,
"grad_norm": 4.940041542053223,
"learning_rate": 7.465458374262213e-07,
"loss": 0.7944,
"step": 21298
},
{
"epoch": 94.76,
"grad_norm": 4.452148914337158,
"learning_rate": 7.183171561553348e-07,
"loss": 0.8021,
"step": 21321
},
{
"epoch": 94.86222222222223,
"grad_norm": 4.3342509269714355,
"learning_rate": 6.906286468555955e-07,
"loss": 0.8016,
"step": 21344
},
{
"epoch": 94.96444444444444,
"grad_norm": 5.098360538482666,
"learning_rate": 6.634806130182025e-07,
"loss": 0.7997,
"step": 21367
},
{
"epoch": 95.06666666666666,
"grad_norm": 4.704761028289795,
"learning_rate": 6.368733522102432e-07,
"loss": 0.8007,
"step": 21390
},
{
"epoch": 95.16888888888889,
"grad_norm": 4.529531002044678,
"learning_rate": 6.108071560714413e-07,
"loss": 0.7976,
"step": 21413
},
{
"epoch": 95.27111111111111,
"grad_norm": 4.470498561859131,
"learning_rate": 5.852823103109639e-07,
"loss": 0.7871,
"step": 21436
},
{
"epoch": 95.37333333333333,
"grad_norm": 4.434628486633301,
"learning_rate": 5.602990947042919e-07,
"loss": 0.8027,
"step": 21459
},
{
"epoch": 95.47555555555556,
"grad_norm": 4.518807411193848,
"learning_rate": 5.358577830901435e-07,
"loss": 0.7986,
"step": 21482
},
{
"epoch": 95.57777777777778,
"grad_norm": 4.176888942718506,
"learning_rate": 5.119586433674661e-07,
"loss": 0.7951,
"step": 21505
},
{
"epoch": 95.68,
"grad_norm": 4.806949138641357,
"learning_rate": 4.886019374925333e-07,
"loss": 0.7995,
"step": 21528
},
{
"epoch": 95.78222222222222,
"grad_norm": 4.371096611022949,
"learning_rate": 4.657879214760297e-07,
"loss": 0.7991,
"step": 21551
},
{
"epoch": 95.88444444444444,
"grad_norm": 4.214781761169434,
"learning_rate": 4.435168453802874e-07,
"loss": 0.7912,
"step": 21574
},
{
"epoch": 95.98666666666666,
"grad_norm": 4.71865177154541,
"learning_rate": 4.2178895331650427e-07,
"loss": 0.804,
"step": 21597
},
{
"epoch": 96.08888888888889,
"grad_norm": 4.573912143707275,
"learning_rate": 4.0060448344209634e-07,
"loss": 0.7969,
"step": 21620
},
{
"epoch": 96.19111111111111,
"grad_norm": 5.047268390655518,
"learning_rate": 3.799636679580887e-07,
"loss": 0.7964,
"step": 21643
},
{
"epoch": 96.29333333333334,
"grad_norm": 4.307917594909668,
"learning_rate": 3.598667331065397e-07,
"loss": 0.7957,
"step": 21666
},
{
"epoch": 96.39555555555556,
"grad_norm": 4.763662815093994,
"learning_rate": 3.403138991681043e-07,
"loss": 0.7958,
"step": 21689
},
{
"epoch": 96.49777777777778,
"grad_norm": 4.808367729187012,
"learning_rate": 3.213053804595911e-07,
"loss": 0.809,
"step": 21712
},
{
"epoch": 96.6,
"grad_norm": 5.026544570922852,
"learning_rate": 3.0284138533160924e-07,
"loss": 0.8024,
"step": 21735
},
{
"epoch": 96.70222222222222,
"grad_norm": 6.12026834487915,
"learning_rate": 2.849221161663085e-07,
"loss": 0.8041,
"step": 21758
},
{
"epoch": 96.80444444444444,
"grad_norm": 4.895252227783203,
"learning_rate": 2.6754776937513717e-07,
"loss": 0.7966,
"step": 21781
},
{
"epoch": 96.90666666666667,
"grad_norm": 4.611559867858887,
"learning_rate": 2.507185353967101e-07,
"loss": 0.8041,
"step": 21804
},
{
"epoch": 97.00888888888889,
"grad_norm": 4.198352813720703,
"learning_rate": 2.344345986946994e-07,
"loss": 0.8013,
"step": 21827
},
{
"epoch": 97.11111111111111,
"grad_norm": 4.63875675201416,
"learning_rate": 2.186961377558361e-07,
"loss": 0.8015,
"step": 21850
},
{
"epoch": 97.21333333333334,
"grad_norm": 4.243088245391846,
"learning_rate": 2.0350332508793367e-07,
"loss": 0.7829,
"step": 21873
},
{
"epoch": 97.31555555555556,
"grad_norm": 4.228803634643555,
"learning_rate": 1.8885632721800106e-07,
"loss": 0.7999,
"step": 21896
},
{
"epoch": 97.41777777777777,
"grad_norm": 5.103250980377197,
"learning_rate": 1.7475530469044376e-07,
"loss": 0.7979,
"step": 21919
},
{
"epoch": 97.52,
"grad_norm": 4.691418170928955,
"learning_rate": 1.6120041206524883e-07,
"loss": 0.7972,
"step": 21942
},
{
"epoch": 97.62222222222222,
"grad_norm": 4.644149303436279,
"learning_rate": 1.481917979163583e-07,
"loss": 0.7897,
"step": 21965
},
{
"epoch": 97.72444444444444,
"grad_norm": 4.451114654541016,
"learning_rate": 1.357296048299761e-07,
"loss": 0.8001,
"step": 21988
},
{
"epoch": 97.82666666666667,
"grad_norm": 4.836966037750244,
"learning_rate": 1.2381396940305824e-07,
"loss": 0.7994,
"step": 22011
},
{
"epoch": 97.92888888888889,
"grad_norm": 4.453198432922363,
"learning_rate": 1.12445022241775e-07,
"loss": 0.7969,
"step": 22034
},
{
"epoch": 98.03111111111112,
"grad_norm": 5.4233903884887695,
"learning_rate": 1.0162288796011221e-07,
"loss": 0.8006,
"step": 22057
},
{
"epoch": 98.13333333333334,
"grad_norm": 4.528837203979492,
"learning_rate": 9.134768517848336e-08,
"loss": 0.8031,
"step": 22080
},
{
"epoch": 98.23555555555555,
"grad_norm": 5.245551586151123,
"learning_rate": 8.161952652243621e-08,
"loss": 0.8005,
"step": 22103
},
{
"epoch": 98.33777777777777,
"grad_norm": 4.625002861022949,
"learning_rate": 7.243851862141492e-08,
"loss": 0.8075,
"step": 22126
},
{
"epoch": 98.44,
"grad_norm": 4.824587345123291,
"learning_rate": 6.38047621075999e-08,
"loss": 0.7925,
"step": 22149
},
{
"epoch": 98.54222222222222,
"grad_norm": 4.704883098602295,
"learning_rate": 5.5718351614797437e-08,
"loss": 0.7953,
"step": 22172
},
{
"epoch": 98.64444444444445,
"grad_norm": 4.561920642852783,
"learning_rate": 4.817937577741294e-08,
"loss": 0.7976,
"step": 22195
},
{
"epoch": 98.74666666666667,
"grad_norm": 4.796523094177246,
"learning_rate": 4.118791722945159e-08,
"loss": 0.8026,
"step": 22218
},
{
"epoch": 98.8488888888889,
"grad_norm": 4.576013565063477,
"learning_rate": 3.474405260365798e-08,
"loss": 0.794,
"step": 22241
},
{
"epoch": 98.95111111111112,
"grad_norm": 5.13820743560791,
"learning_rate": 2.8847852530622387e-08,
"loss": 0.7895,
"step": 22264
},
{
"epoch": 99.05333333333333,
"grad_norm": 4.2987060546875,
"learning_rate": 2.3499381638064645e-08,
"loss": 0.7919,
"step": 22287
},
{
"epoch": 99.15555555555555,
"grad_norm": 4.3480305671691895,
"learning_rate": 1.8698698550068117e-08,
"loss": 0.798,
"step": 22310
},
{
"epoch": 99.25777777777778,
"grad_norm": 5.037069797515869,
"learning_rate": 1.4445855886480176e-08,
"loss": 0.8026,
"step": 22333
},
{
"epoch": 99.36,
"grad_norm": 4.374788284301758,
"learning_rate": 1.074090026231267e-08,
"loss": 0.7926,
"step": 22356
},
{
"epoch": 99.46222222222222,
"grad_norm": 4.93529748916626,
"learning_rate": 7.583872287253436e-09,
"loss": 0.8044,
"step": 22379
},
{
"epoch": 99.56444444444445,
"grad_norm": 4.404996395111084,
"learning_rate": 4.974806565177792e-09,
"loss": 0.802,
"step": 22402
},
{
"epoch": 99.66666666666667,
"grad_norm": 4.556636333465576,
"learning_rate": 2.9137316938265825e-09,
"loss": 0.793,
"step": 22425
},
{
"epoch": 99.7688888888889,
"grad_norm": 4.4638190269470215,
"learning_rate": 1.4006702644453474e-09,
"loss": 0.7999,
"step": 22448
},
{
"epoch": 99.8711111111111,
"grad_norm": 4.293120861053467,
"learning_rate": 4.3563886156228196e-10,
"loss": 0.8048,
"step": 22471
},
{
"epoch": 99.97333333333333,
"grad_norm": 4.100605010986328,
"learning_rate": 1.8648062799497822e-11,
"loss": 0.7996,
"step": 22494
},
{
"epoch": 100.0,
"step": 22500,
"total_flos": 2.1925440120390943e+18,
"train_loss": 2.6133422136730617,
"train_runtime": 133573.7106,
"train_samples_per_second": 86.157,
"train_steps_per_second": 0.168
}
],
"logging_steps": 23,
"max_steps": 22500,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.1925440120390943e+18,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}