{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2618031500583697, "eval_steps": 500, "global_step": 21978, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005741297150402847, "grad_norm": 47.5, "learning_rate": 8.604206500956023e-07, "loss": 12.2289, "step": 10 }, { "epoch": 0.0011482594300805695, "grad_norm": 41.0, "learning_rate": 1.8164435946462718e-06, "loss": 12.1447, "step": 20 }, { "epoch": 0.0017223891451208543, "grad_norm": 38.75, "learning_rate": 2.772466539196941e-06, "loss": 12.0576, "step": 30 }, { "epoch": 0.002296518860161139, "grad_norm": 75.0, "learning_rate": 3.7284894837476104e-06, "loss": 11.9311, "step": 40 }, { "epoch": 0.0028706485752014238, "grad_norm": 38.25, "learning_rate": 4.68451242829828e-06, "loss": 11.6058, "step": 50 }, { "epoch": 0.0034447782902417086, "grad_norm": 35.0, "learning_rate": 5.6405353728489485e-06, "loss": 10.9243, "step": 60 }, { "epoch": 0.0040189080052819934, "grad_norm": 59.25, "learning_rate": 6.596558317399617e-06, "loss": 10.1567, "step": 70 }, { "epoch": 0.004593037720322278, "grad_norm": 52.0, "learning_rate": 7.552581261950287e-06, "loss": 9.6776, "step": 80 }, { "epoch": 0.005167167435362563, "grad_norm": 18.75, "learning_rate": 8.508604206500955e-06, "loss": 9.3481, "step": 90 }, { "epoch": 0.0057412971504028475, "grad_norm": 19.25, "learning_rate": 9.464627151051626e-06, "loss": 9.0556, "step": 100 }, { "epoch": 0.006315426865443133, "grad_norm": 70.0, "learning_rate": 1.0420650095602295e-05, "loss": 8.7948, "step": 110 }, { "epoch": 0.006889556580483417, "grad_norm": 54.0, "learning_rate": 1.1376673040152965e-05, "loss": 8.6711, "step": 120 }, { "epoch": 0.007463686295523702, "grad_norm": 22.25, "learning_rate": 1.2332695984703634e-05, "loss": 8.5725, "step": 130 }, { "epoch": 0.008037816010563987, "grad_norm": 5.03125, "learning_rate": 1.3288718929254305e-05, "loss": 8.3944, "step": 140 }, { "epoch": 0.008611945725604272, "grad_norm": 4.84375, "learning_rate": 1.4244741873804973e-05, "loss": 8.1984, "step": 150 }, { "epoch": 0.009186075440644556, "grad_norm": 2.640625, "learning_rate": 1.5200764818355642e-05, "loss": 8.0769, "step": 160 }, { "epoch": 0.009760205155684841, "grad_norm": 2.21875, "learning_rate": 1.615678776290631e-05, "loss": 8.0288, "step": 170 }, { "epoch": 0.010334334870725126, "grad_norm": 4.0625, "learning_rate": 1.7112810707456982e-05, "loss": 7.9832, "step": 180 }, { "epoch": 0.01090846458576541, "grad_norm": 5.71875, "learning_rate": 1.806883365200765e-05, "loss": 7.956, "step": 190 }, { "epoch": 0.011482594300805695, "grad_norm": 2.046875, "learning_rate": 1.902485659655832e-05, "loss": 7.9347, "step": 200 }, { "epoch": 0.01205672401584598, "grad_norm": 2.453125, "learning_rate": 1.9980879541108987e-05, "loss": 7.9107, "step": 210 }, { "epoch": 0.012630853730886266, "grad_norm": 2.234375, "learning_rate": 2.0936902485659657e-05, "loss": 7.8908, "step": 220 }, { "epoch": 0.01320498344592655, "grad_norm": 2.375, "learning_rate": 2.1892925430210324e-05, "loss": 7.8553, "step": 230 }, { "epoch": 0.013779113160966834, "grad_norm": 2.140625, "learning_rate": 2.2848948374760995e-05, "loss": 7.8601, "step": 240 }, { "epoch": 0.01435324287600712, "grad_norm": 2.265625, "learning_rate": 2.3804971319311666e-05, "loss": 7.8253, "step": 250 }, { "epoch": 0.014927372591047403, "grad_norm": 2.84375, "learning_rate": 2.4760994263862333e-05, "loss": 7.8335, "step": 260 }, { "epoch": 0.015501502306087689, "grad_norm": 2.15625, "learning_rate": 2.5717017208413003e-05, "loss": 7.8141, "step": 270 }, { "epoch": 0.016075632021127974, "grad_norm": 2.03125, "learning_rate": 2.6673040152963674e-05, "loss": 7.8022, "step": 280 }, { "epoch": 0.016649761736168257, "grad_norm": 4.15625, "learning_rate": 2.762906309751434e-05, "loss": 7.7881, "step": 290 }, { "epoch": 0.017223891451208544, "grad_norm": 1.96875, "learning_rate": 2.858508604206501e-05, "loss": 7.7425, "step": 300 }, { "epoch": 0.017798021166248828, "grad_norm": 1.953125, "learning_rate": 2.954110898661568e-05, "loss": 7.7538, "step": 310 }, { "epoch": 0.01837215088128911, "grad_norm": 1.90625, "learning_rate": 3.049713193116635e-05, "loss": 7.7481, "step": 320 }, { "epoch": 0.0189462805963294, "grad_norm": 1.9453125, "learning_rate": 3.145315487571702e-05, "loss": 7.7378, "step": 330 }, { "epoch": 0.019520410311369682, "grad_norm": 1.875, "learning_rate": 3.240917782026769e-05, "loss": 7.7184, "step": 340 }, { "epoch": 0.020094540026409966, "grad_norm": 2.25, "learning_rate": 3.3365200764818354e-05, "loss": 7.6801, "step": 350 }, { "epoch": 0.020668669741450253, "grad_norm": 2.171875, "learning_rate": 3.432122370936903e-05, "loss": 7.6844, "step": 360 }, { "epoch": 0.021242799456490536, "grad_norm": 1.875, "learning_rate": 3.5277246653919695e-05, "loss": 7.6768, "step": 370 }, { "epoch": 0.02181692917153082, "grad_norm": 1.8515625, "learning_rate": 3.623326959847036e-05, "loss": 7.6567, "step": 380 }, { "epoch": 0.022391058886571107, "grad_norm": 2.0625, "learning_rate": 3.7189292543021036e-05, "loss": 7.6527, "step": 390 }, { "epoch": 0.02296518860161139, "grad_norm": 2.15625, "learning_rate": 3.8145315487571704e-05, "loss": 7.6469, "step": 400 }, { "epoch": 0.023539318316651677, "grad_norm": 2.015625, "learning_rate": 3.910133843212238e-05, "loss": 7.6354, "step": 410 }, { "epoch": 0.02411344803169196, "grad_norm": 1.8828125, "learning_rate": 4.0057361376673045e-05, "loss": 7.6377, "step": 420 }, { "epoch": 0.024687577746732244, "grad_norm": 3.0, "learning_rate": 4.101338432122371e-05, "loss": 7.6348, "step": 430 }, { "epoch": 0.02526170746177253, "grad_norm": 1.9296875, "learning_rate": 4.196940726577438e-05, "loss": 7.6112, "step": 440 }, { "epoch": 0.025835837176812815, "grad_norm": 1.875, "learning_rate": 4.292543021032505e-05, "loss": 7.6331, "step": 450 }, { "epoch": 0.0264099668918531, "grad_norm": 2.0, "learning_rate": 4.388145315487572e-05, "loss": 7.5849, "step": 460 }, { "epoch": 0.026984096606893385, "grad_norm": 2.046875, "learning_rate": 4.483747609942639e-05, "loss": 7.5836, "step": 470 }, { "epoch": 0.02755822632193367, "grad_norm": 2.78125, "learning_rate": 4.5793499043977055e-05, "loss": 7.5977, "step": 480 }, { "epoch": 0.028132356036973952, "grad_norm": 2.109375, "learning_rate": 4.674952198852773e-05, "loss": 7.5915, "step": 490 }, { "epoch": 0.02870648575201424, "grad_norm": 1.9921875, "learning_rate": 4.7705544933078396e-05, "loss": 7.5873, "step": 500 }, { "epoch": 0.029280615467054523, "grad_norm": 1.90625, "learning_rate": 4.866156787762906e-05, "loss": 7.5587, "step": 510 }, { "epoch": 0.029854745182094806, "grad_norm": 2.078125, "learning_rate": 4.961759082217973e-05, "loss": 7.5544, "step": 520 }, { "epoch": 0.030428874897135093, "grad_norm": 2.140625, "learning_rate": 5.05736137667304e-05, "loss": 7.5507, "step": 530 }, { "epoch": 0.031003004612175377, "grad_norm": 1.96875, "learning_rate": 5.152963671128107e-05, "loss": 7.5432, "step": 540 }, { "epoch": 0.031577134327215664, "grad_norm": 2.0625, "learning_rate": 5.2485659655831745e-05, "loss": 7.5419, "step": 550 }, { "epoch": 0.03215126404225595, "grad_norm": 2.078125, "learning_rate": 5.344168260038241e-05, "loss": 7.5418, "step": 560 }, { "epoch": 0.03272539375729623, "grad_norm": 2.25, "learning_rate": 5.4397705544933086e-05, "loss": 7.5354, "step": 570 }, { "epoch": 0.033299523472336515, "grad_norm": 2.0, "learning_rate": 5.535372848948375e-05, "loss": 7.5175, "step": 580 }, { "epoch": 0.0338736531873768, "grad_norm": 2.25, "learning_rate": 5.630975143403442e-05, "loss": 7.5083, "step": 590 }, { "epoch": 0.03444778290241709, "grad_norm": 2.25, "learning_rate": 5.726577437858509e-05, "loss": 7.509, "step": 600 }, { "epoch": 0.03502191261745737, "grad_norm": 2.0, "learning_rate": 5.822179732313576e-05, "loss": 7.4949, "step": 610 }, { "epoch": 0.035596042332497656, "grad_norm": 2.296875, "learning_rate": 5.917782026768642e-05, "loss": 7.5187, "step": 620 }, { "epoch": 0.03617017204753794, "grad_norm": 2.015625, "learning_rate": 6.0133843212237096e-05, "loss": 7.471, "step": 630 }, { "epoch": 0.03674430176257822, "grad_norm": 2.125, "learning_rate": 6.108986615678777e-05, "loss": 7.4908, "step": 640 }, { "epoch": 0.03731843147761851, "grad_norm": 2.078125, "learning_rate": 6.204588910133844e-05, "loss": 7.4648, "step": 650 }, { "epoch": 0.0378925611926588, "grad_norm": 2.203125, "learning_rate": 6.30019120458891e-05, "loss": 7.4751, "step": 660 }, { "epoch": 0.03846669090769908, "grad_norm": 1.9453125, "learning_rate": 6.395793499043978e-05, "loss": 7.451, "step": 670 }, { "epoch": 0.039040820622739364, "grad_norm": 2.25, "learning_rate": 6.491395793499044e-05, "loss": 7.4656, "step": 680 }, { "epoch": 0.03961495033777965, "grad_norm": 2.03125, "learning_rate": 6.586998087954111e-05, "loss": 7.4733, "step": 690 }, { "epoch": 0.04018908005281993, "grad_norm": 2.34375, "learning_rate": 6.682600382409177e-05, "loss": 7.4538, "step": 700 }, { "epoch": 0.04076320976786022, "grad_norm": 2.109375, "learning_rate": 6.778202676864245e-05, "loss": 7.4687, "step": 710 }, { "epoch": 0.041337339482900505, "grad_norm": 1.9140625, "learning_rate": 6.873804971319312e-05, "loss": 7.4557, "step": 720 }, { "epoch": 0.04191146919794079, "grad_norm": 2.15625, "learning_rate": 6.96940726577438e-05, "loss": 7.4609, "step": 730 }, { "epoch": 0.04248559891298107, "grad_norm": 2.203125, "learning_rate": 7.065009560229447e-05, "loss": 7.4635, "step": 740 }, { "epoch": 0.043059728628021356, "grad_norm": 2.046875, "learning_rate": 7.160611854684513e-05, "loss": 7.4266, "step": 750 }, { "epoch": 0.04363385834306164, "grad_norm": 2.171875, "learning_rate": 7.256214149139579e-05, "loss": 7.4294, "step": 760 }, { "epoch": 0.04420798805810193, "grad_norm": 1.9921875, "learning_rate": 7.351816443594646e-05, "loss": 7.4687, "step": 770 }, { "epoch": 0.04478211777314221, "grad_norm": 2.1875, "learning_rate": 7.447418738049714e-05, "loss": 7.441, "step": 780 }, { "epoch": 0.0453562474881825, "grad_norm": 2.09375, "learning_rate": 7.54302103250478e-05, "loss": 7.429, "step": 790 }, { "epoch": 0.04593037720322278, "grad_norm": 1.9140625, "learning_rate": 7.638623326959847e-05, "loss": 7.4286, "step": 800 }, { "epoch": 0.046504506918263064, "grad_norm": 2.046875, "learning_rate": 7.734225621414915e-05, "loss": 7.433, "step": 810 }, { "epoch": 0.047078636633303354, "grad_norm": 2.1875, "learning_rate": 7.829827915869982e-05, "loss": 7.4379, "step": 820 }, { "epoch": 0.04765276634834364, "grad_norm": 2.78125, "learning_rate": 7.925430210325048e-05, "loss": 7.4205, "step": 830 }, { "epoch": 0.04822689606338392, "grad_norm": 2.1875, "learning_rate": 8.021032504780115e-05, "loss": 7.4257, "step": 840 }, { "epoch": 0.048801025778424205, "grad_norm": 2.046875, "learning_rate": 8.116634799235181e-05, "loss": 7.4371, "step": 850 }, { "epoch": 0.04937515549346449, "grad_norm": 2.0, "learning_rate": 8.212237093690249e-05, "loss": 7.4293, "step": 860 }, { "epoch": 0.04994928520850477, "grad_norm": 2.046875, "learning_rate": 8.307839388145315e-05, "loss": 7.4071, "step": 870 }, { "epoch": 0.05052341492354506, "grad_norm": 2.125, "learning_rate": 8.403441682600382e-05, "loss": 7.4027, "step": 880 }, { "epoch": 0.051097544638585346, "grad_norm": 1.984375, "learning_rate": 8.49904397705545e-05, "loss": 7.4302, "step": 890 }, { "epoch": 0.05167167435362563, "grad_norm": 2.140625, "learning_rate": 8.594646271510517e-05, "loss": 7.4265, "step": 900 }, { "epoch": 0.05224580406866591, "grad_norm": 1.890625, "learning_rate": 8.690248565965584e-05, "loss": 7.4176, "step": 910 }, { "epoch": 0.0528199337837062, "grad_norm": 2.0625, "learning_rate": 8.78585086042065e-05, "loss": 7.4276, "step": 920 }, { "epoch": 0.05339406349874648, "grad_norm": 2.109375, "learning_rate": 8.881453154875718e-05, "loss": 7.3996, "step": 930 }, { "epoch": 0.05396819321378677, "grad_norm": 2.125, "learning_rate": 8.977055449330784e-05, "loss": 7.4079, "step": 940 }, { "epoch": 0.054542322928827054, "grad_norm": 1.96875, "learning_rate": 9.072657743785851e-05, "loss": 7.3955, "step": 950 }, { "epoch": 0.05511645264386734, "grad_norm": 1.96875, "learning_rate": 9.168260038240917e-05, "loss": 7.4203, "step": 960 }, { "epoch": 0.05569058235890762, "grad_norm": 2.046875, "learning_rate": 9.263862332695985e-05, "loss": 7.39, "step": 970 }, { "epoch": 0.056264712073947905, "grad_norm": 2.046875, "learning_rate": 9.359464627151052e-05, "loss": 7.4234, "step": 980 }, { "epoch": 0.056838841788988195, "grad_norm": 2.015625, "learning_rate": 9.45506692160612e-05, "loss": 7.3976, "step": 990 }, { "epoch": 0.05741297150402848, "grad_norm": 2.046875, "learning_rate": 9.550669216061186e-05, "loss": 7.4, "step": 1000 }, { "epoch": 0.05798710121906876, "grad_norm": 2.25, "learning_rate": 9.646271510516253e-05, "loss": 7.3905, "step": 1010 }, { "epoch": 0.058561230934109046, "grad_norm": 1.9765625, "learning_rate": 9.74187380497132e-05, "loss": 7.3955, "step": 1020 }, { "epoch": 0.05913536064914933, "grad_norm": 2.0625, "learning_rate": 9.837476099426386e-05, "loss": 7.3945, "step": 1030 }, { "epoch": 0.05970949036418961, "grad_norm": 1.90625, "learning_rate": 9.933078393881452e-05, "loss": 7.3953, "step": 1040 }, { "epoch": 0.0602836200792299, "grad_norm": 1.984375, "learning_rate": 9.999999805483122e-05, "loss": 7.3903, "step": 1050 }, { "epoch": 0.06085774979427019, "grad_norm": 2.21875, "learning_rate": 9.99999634740572e-05, "loss": 7.3952, "step": 1060 }, { "epoch": 0.06143187950931047, "grad_norm": 2.25, "learning_rate": 9.999988566734478e-05, "loss": 7.3853, "step": 1070 }, { "epoch": 0.062006009224350754, "grad_norm": 1.796875, "learning_rate": 9.999976463476122e-05, "loss": 7.3878, "step": 1080 }, { "epoch": 0.06258013893939104, "grad_norm": 2.140625, "learning_rate": 9.999960037641117e-05, "loss": 7.3885, "step": 1090 }, { "epoch": 0.06315426865443133, "grad_norm": 2.03125, "learning_rate": 9.999939289243663e-05, "loss": 7.377, "step": 1100 }, { "epoch": 0.06372839836947161, "grad_norm": 1.8671875, "learning_rate": 9.999914218301699e-05, "loss": 7.3959, "step": 1110 }, { "epoch": 0.0643025280845119, "grad_norm": 1.875, "learning_rate": 9.999884824836898e-05, "loss": 7.3735, "step": 1120 }, { "epoch": 0.06487665779955218, "grad_norm": 1.8984375, "learning_rate": 9.99985110887467e-05, "loss": 7.3862, "step": 1130 }, { "epoch": 0.06545078751459246, "grad_norm": 2.28125, "learning_rate": 9.999813070444166e-05, "loss": 7.3695, "step": 1140 }, { "epoch": 0.06602491722963275, "grad_norm": 2.21875, "learning_rate": 9.999770709578267e-05, "loss": 7.3761, "step": 1150 }, { "epoch": 0.06659904694467303, "grad_norm": 1.8515625, "learning_rate": 9.999724026313598e-05, "loss": 7.355, "step": 1160 }, { "epoch": 0.06717317665971331, "grad_norm": 1.6953125, "learning_rate": 9.999673020690516e-05, "loss": 7.3493, "step": 1170 }, { "epoch": 0.0677473063747536, "grad_norm": 1.84375, "learning_rate": 9.999617692753119e-05, "loss": 7.3738, "step": 1180 }, { "epoch": 0.0683214360897939, "grad_norm": 1.8671875, "learning_rate": 9.999558042549236e-05, "loss": 7.3737, "step": 1190 }, { "epoch": 0.06889556580483418, "grad_norm": 1.7421875, "learning_rate": 9.999494070130435e-05, "loss": 7.3867, "step": 1200 }, { "epoch": 0.06946969551987446, "grad_norm": 3.265625, "learning_rate": 9.999425775552025e-05, "loss": 7.3736, "step": 1210 }, { "epoch": 0.07004382523491474, "grad_norm": 1.828125, "learning_rate": 9.999353158873045e-05, "loss": 7.3807, "step": 1220 }, { "epoch": 0.07061795494995503, "grad_norm": 1.9921875, "learning_rate": 9.999276220156276e-05, "loss": 7.3413, "step": 1230 }, { "epoch": 0.07119208466499531, "grad_norm": 2.296875, "learning_rate": 9.99919495946823e-05, "loss": 7.3401, "step": 1240 }, { "epoch": 0.0717662143800356, "grad_norm": 2.03125, "learning_rate": 9.999109376879163e-05, "loss": 7.3607, "step": 1250 }, { "epoch": 0.07234034409507588, "grad_norm": 1.875, "learning_rate": 9.999019472463057e-05, "loss": 7.353, "step": 1260 }, { "epoch": 0.07291447381011616, "grad_norm": 1.8203125, "learning_rate": 9.998925246297641e-05, "loss": 7.3633, "step": 1270 }, { "epoch": 0.07348860352515645, "grad_norm": 1.8984375, "learning_rate": 9.998826698464372e-05, "loss": 7.3313, "step": 1280 }, { "epoch": 0.07406273324019673, "grad_norm": 1.8203125, "learning_rate": 9.998723829048449e-05, "loss": 7.371, "step": 1290 }, { "epoch": 0.07463686295523703, "grad_norm": 1.828125, "learning_rate": 9.998616638138802e-05, "loss": 7.3106, "step": 1300 }, { "epoch": 0.07521099267027731, "grad_norm": 1.9375, "learning_rate": 9.998505125828104e-05, "loss": 7.3412, "step": 1310 }, { "epoch": 0.0757851223853176, "grad_norm": 1.8515625, "learning_rate": 9.998389292212755e-05, "loss": 7.3174, "step": 1320 }, { "epoch": 0.07635925210035788, "grad_norm": 1.78125, "learning_rate": 9.998269137392897e-05, "loss": 7.34, "step": 1330 }, { "epoch": 0.07693338181539816, "grad_norm": 1.8671875, "learning_rate": 9.998144661472406e-05, "loss": 7.3135, "step": 1340 }, { "epoch": 0.07750751153043844, "grad_norm": 1.78125, "learning_rate": 9.998015864558895e-05, "loss": 7.3592, "step": 1350 }, { "epoch": 0.07808164124547873, "grad_norm": 2.1875, "learning_rate": 9.99788274676371e-05, "loss": 7.3103, "step": 1360 }, { "epoch": 0.07865577096051901, "grad_norm": 2.015625, "learning_rate": 9.997745308201935e-05, "loss": 7.3127, "step": 1370 }, { "epoch": 0.0792299006755593, "grad_norm": 1.984375, "learning_rate": 9.997603548992387e-05, "loss": 7.3279, "step": 1380 }, { "epoch": 0.07980403039059958, "grad_norm": 1.6640625, "learning_rate": 9.99745746925762e-05, "loss": 7.3282, "step": 1390 }, { "epoch": 0.08037816010563986, "grad_norm": 1.8125, "learning_rate": 9.997307069123925e-05, "loss": 7.3304, "step": 1400 }, { "epoch": 0.08095228982068015, "grad_norm": 1.765625, "learning_rate": 9.997152348721324e-05, "loss": 7.3042, "step": 1410 }, { "epoch": 0.08152641953572044, "grad_norm": 1.765625, "learning_rate": 9.996993308183575e-05, "loss": 7.3112, "step": 1420 }, { "epoch": 0.08210054925076073, "grad_norm": 1.8828125, "learning_rate": 9.996829947648172e-05, "loss": 7.3347, "step": 1430 }, { "epoch": 0.08267467896580101, "grad_norm": 1.796875, "learning_rate": 9.996662267256344e-05, "loss": 7.3229, "step": 1440 }, { "epoch": 0.0832488086808413, "grad_norm": 1.734375, "learning_rate": 9.996490267153053e-05, "loss": 7.3004, "step": 1450 }, { "epoch": 0.08382293839588158, "grad_norm": 1.875, "learning_rate": 9.996313947486999e-05, "loss": 7.3087, "step": 1460 }, { "epoch": 0.08439706811092186, "grad_norm": 1.71875, "learning_rate": 9.996133308410609e-05, "loss": 7.3292, "step": 1470 }, { "epoch": 0.08497119782596214, "grad_norm": 1.8125, "learning_rate": 9.995948350080055e-05, "loss": 7.314, "step": 1480 }, { "epoch": 0.08554532754100243, "grad_norm": 2.015625, "learning_rate": 9.995759072655231e-05, "loss": 7.2709, "step": 1490 }, { "epoch": 0.08611945725604271, "grad_norm": 1.7734375, "learning_rate": 9.995565476299777e-05, "loss": 7.314, "step": 1500 }, { "epoch": 0.086693586971083, "grad_norm": 1.796875, "learning_rate": 9.995367561181057e-05, "loss": 7.3032, "step": 1510 }, { "epoch": 0.08726771668612328, "grad_norm": 1.828125, "learning_rate": 9.99516532747017e-05, "loss": 7.3275, "step": 1520 }, { "epoch": 0.08784184640116358, "grad_norm": 1.8828125, "learning_rate": 9.994958775341958e-05, "loss": 7.3007, "step": 1530 }, { "epoch": 0.08841597611620386, "grad_norm": 1.7109375, "learning_rate": 9.994747904974983e-05, "loss": 7.3164, "step": 1540 }, { "epoch": 0.08899010583124414, "grad_norm": 1.796875, "learning_rate": 9.994532716551551e-05, "loss": 7.2968, "step": 1550 }, { "epoch": 0.08956423554628443, "grad_norm": 1.7109375, "learning_rate": 9.994313210257694e-05, "loss": 7.2734, "step": 1560 }, { "epoch": 0.09013836526132471, "grad_norm": 1.765625, "learning_rate": 9.994089386283181e-05, "loss": 7.3032, "step": 1570 }, { "epoch": 0.090712494976365, "grad_norm": 1.6953125, "learning_rate": 9.99386124482151e-05, "loss": 7.295, "step": 1580 }, { "epoch": 0.09128662469140528, "grad_norm": 1.734375, "learning_rate": 9.993628786069914e-05, "loss": 7.2955, "step": 1590 }, { "epoch": 0.09186075440644556, "grad_norm": 1.765625, "learning_rate": 9.993392010229361e-05, "loss": 7.2999, "step": 1600 }, { "epoch": 0.09243488412148584, "grad_norm": 1.6953125, "learning_rate": 9.993150917504545e-05, "loss": 7.2974, "step": 1610 }, { "epoch": 0.09300901383652613, "grad_norm": 1.671875, "learning_rate": 9.992905508103897e-05, "loss": 7.3055, "step": 1620 }, { "epoch": 0.09358314355156641, "grad_norm": 1.875, "learning_rate": 9.992655782239577e-05, "loss": 7.2906, "step": 1630 }, { "epoch": 0.09415727326660671, "grad_norm": 1.8125, "learning_rate": 9.992401740127481e-05, "loss": 7.2949, "step": 1640 }, { "epoch": 0.09473140298164699, "grad_norm": 1.6953125, "learning_rate": 9.992143381987229e-05, "loss": 7.2723, "step": 1650 }, { "epoch": 0.09530553269668728, "grad_norm": 1.75, "learning_rate": 9.99188070804218e-05, "loss": 7.3072, "step": 1660 }, { "epoch": 0.09587966241172756, "grad_norm": 1.8984375, "learning_rate": 9.991613718519419e-05, "loss": 7.2643, "step": 1670 }, { "epoch": 0.09645379212676784, "grad_norm": 1.8515625, "learning_rate": 9.991342413649764e-05, "loss": 7.2772, "step": 1680 }, { "epoch": 0.09702792184180813, "grad_norm": 1.6640625, "learning_rate": 9.991066793667763e-05, "loss": 7.2988, "step": 1690 }, { "epoch": 0.09760205155684841, "grad_norm": 1.8359375, "learning_rate": 9.990786858811695e-05, "loss": 7.291, "step": 1700 }, { "epoch": 0.0981761812718887, "grad_norm": 1.671875, "learning_rate": 9.99050260932357e-05, "loss": 7.281, "step": 1710 }, { "epoch": 0.09875031098692898, "grad_norm": 1.78125, "learning_rate": 9.990214045449127e-05, "loss": 7.2702, "step": 1720 }, { "epoch": 0.09932444070196926, "grad_norm": 1.7421875, "learning_rate": 9.989921167437833e-05, "loss": 7.2667, "step": 1730 }, { "epoch": 0.09989857041700954, "grad_norm": 1.671875, "learning_rate": 9.989623975542888e-05, "loss": 7.2564, "step": 1740 }, { "epoch": 0.10047270013204983, "grad_norm": 1.796875, "learning_rate": 9.989322470021221e-05, "loss": 7.2945, "step": 1750 }, { "epoch": 0.10104682984709012, "grad_norm": 1.6953125, "learning_rate": 9.98901665113349e-05, "loss": 7.2481, "step": 1760 }, { "epoch": 0.10162095956213041, "grad_norm": 1.734375, "learning_rate": 9.98870651914408e-05, "loss": 7.2643, "step": 1770 }, { "epoch": 0.10219508927717069, "grad_norm": 1.6796875, "learning_rate": 9.988392074321105e-05, "loss": 7.2704, "step": 1780 }, { "epoch": 0.10276921899221098, "grad_norm": 1.75, "learning_rate": 9.98807331693641e-05, "loss": 7.2702, "step": 1790 }, { "epoch": 0.10334334870725126, "grad_norm": 1.734375, "learning_rate": 9.987750247265568e-05, "loss": 7.2794, "step": 1800 }, { "epoch": 0.10391747842229154, "grad_norm": 1.875, "learning_rate": 9.987422865587878e-05, "loss": 7.2656, "step": 1810 }, { "epoch": 0.10449160813733183, "grad_norm": 1.7890625, "learning_rate": 9.987091172186367e-05, "loss": 7.2393, "step": 1820 }, { "epoch": 0.10506573785237211, "grad_norm": 1.7109375, "learning_rate": 9.986755167347791e-05, "loss": 7.2547, "step": 1830 }, { "epoch": 0.1056398675674124, "grad_norm": 1.609375, "learning_rate": 9.986414851362633e-05, "loss": 7.2572, "step": 1840 }, { "epoch": 0.10621399728245268, "grad_norm": 1.6484375, "learning_rate": 9.986070224525101e-05, "loss": 7.2767, "step": 1850 }, { "epoch": 0.10678812699749296, "grad_norm": 1.8046875, "learning_rate": 9.985721287133136e-05, "loss": 7.2495, "step": 1860 }, { "epoch": 0.10736225671253326, "grad_norm": 1.671875, "learning_rate": 9.985368039488397e-05, "loss": 7.2741, "step": 1870 }, { "epoch": 0.10793638642757354, "grad_norm": 1.71875, "learning_rate": 9.985010481896274e-05, "loss": 7.267, "step": 1880 }, { "epoch": 0.10851051614261382, "grad_norm": 1.7109375, "learning_rate": 9.984648614665884e-05, "loss": 7.2617, "step": 1890 }, { "epoch": 0.10908464585765411, "grad_norm": 1.796875, "learning_rate": 9.984282438110067e-05, "loss": 7.2747, "step": 1900 }, { "epoch": 0.10965877557269439, "grad_norm": 1.7890625, "learning_rate": 9.983911952545391e-05, "loss": 7.2657, "step": 1910 }, { "epoch": 0.11023290528773468, "grad_norm": 1.6875, "learning_rate": 9.983537158292145e-05, "loss": 7.2559, "step": 1920 }, { "epoch": 0.11080703500277496, "grad_norm": 1.6640625, "learning_rate": 9.98315805567435e-05, "loss": 7.271, "step": 1930 }, { "epoch": 0.11138116471781524, "grad_norm": 1.734375, "learning_rate": 9.982774645019746e-05, "loss": 7.2564, "step": 1940 }, { "epoch": 0.11195529443285553, "grad_norm": 1.7734375, "learning_rate": 9.982386926659798e-05, "loss": 7.2654, "step": 1950 }, { "epoch": 0.11252942414789581, "grad_norm": 1.671875, "learning_rate": 9.981994900929694e-05, "loss": 7.2521, "step": 1960 }, { "epoch": 0.1131035538629361, "grad_norm": 1.75, "learning_rate": 9.981598568168354e-05, "loss": 7.2378, "step": 1970 }, { "epoch": 0.11367768357797639, "grad_norm": 1.8125, "learning_rate": 9.98119792871841e-05, "loss": 7.2558, "step": 1980 }, { "epoch": 0.11425181329301667, "grad_norm": 1.6875, "learning_rate": 9.980792982926224e-05, "loss": 7.2372, "step": 1990 }, { "epoch": 0.11482594300805696, "grad_norm": 1.625, "learning_rate": 9.98038373114188e-05, "loss": 7.2435, "step": 2000 }, { "epoch": 0.11540007272309724, "grad_norm": 1.578125, "learning_rate": 9.979970173719186e-05, "loss": 7.2298, "step": 2010 }, { "epoch": 0.11597420243813752, "grad_norm": 1.7109375, "learning_rate": 9.979552311015666e-05, "loss": 7.2634, "step": 2020 }, { "epoch": 0.11654833215317781, "grad_norm": 1.59375, "learning_rate": 9.979130143392575e-05, "loss": 7.2516, "step": 2030 }, { "epoch": 0.11712246186821809, "grad_norm": 1.78125, "learning_rate": 9.978703671214881e-05, "loss": 7.2689, "step": 2040 }, { "epoch": 0.11769659158325838, "grad_norm": 1.578125, "learning_rate": 9.97827289485128e-05, "loss": 7.2549, "step": 2050 }, { "epoch": 0.11827072129829866, "grad_norm": 1.6953125, "learning_rate": 9.977837814674186e-05, "loss": 7.2468, "step": 2060 }, { "epoch": 0.11884485101333894, "grad_norm": 1.765625, "learning_rate": 9.977398431059734e-05, "loss": 7.2569, "step": 2070 }, { "epoch": 0.11941898072837923, "grad_norm": 1.7890625, "learning_rate": 9.97695474438778e-05, "loss": 7.2475, "step": 2080 }, { "epoch": 0.11999311044341952, "grad_norm": 1.6796875, "learning_rate": 9.976506755041898e-05, "loss": 7.2458, "step": 2090 }, { "epoch": 0.1205672401584598, "grad_norm": 1.6875, "learning_rate": 9.976054463409388e-05, "loss": 7.2387, "step": 2100 }, { "epoch": 0.12114136987350009, "grad_norm": 1.65625, "learning_rate": 9.97559786988126e-05, "loss": 7.2162, "step": 2110 }, { "epoch": 0.12171549958854037, "grad_norm": 1.8828125, "learning_rate": 9.97513697485225e-05, "loss": 7.2453, "step": 2120 }, { "epoch": 0.12228962930358066, "grad_norm": 1.6015625, "learning_rate": 9.97467177872081e-05, "loss": 7.2229, "step": 2130 }, { "epoch": 0.12286375901862094, "grad_norm": 1.71875, "learning_rate": 9.974202281889114e-05, "loss": 7.2433, "step": 2140 }, { "epoch": 0.12343788873366122, "grad_norm": 1.6875, "learning_rate": 9.973728484763047e-05, "loss": 7.232, "step": 2150 }, { "epoch": 0.12401201844870151, "grad_norm": 1.7265625, "learning_rate": 9.973250387752217e-05, "loss": 7.2322, "step": 2160 }, { "epoch": 0.12458614816374179, "grad_norm": 1.6796875, "learning_rate": 9.97276799126995e-05, "loss": 7.2194, "step": 2170 }, { "epoch": 0.1251602778787821, "grad_norm": 1.6015625, "learning_rate": 9.972281295733286e-05, "loss": 7.2392, "step": 2180 }, { "epoch": 0.12573440759382237, "grad_norm": 1.765625, "learning_rate": 9.971790301562981e-05, "loss": 7.2556, "step": 2190 }, { "epoch": 0.12630853730886266, "grad_norm": 1.703125, "learning_rate": 9.971295009183512e-05, "loss": 7.2522, "step": 2200 }, { "epoch": 0.12688266702390294, "grad_norm": 1.625, "learning_rate": 9.970795419023065e-05, "loss": 7.2535, "step": 2210 }, { "epoch": 0.12745679673894322, "grad_norm": 1.765625, "learning_rate": 9.970291531513551e-05, "loss": 7.2208, "step": 2220 }, { "epoch": 0.1280309264539835, "grad_norm": 1.734375, "learning_rate": 9.969783347090585e-05, "loss": 7.2127, "step": 2230 }, { "epoch": 0.1286050561690238, "grad_norm": 1.6640625, "learning_rate": 9.969270866193506e-05, "loss": 7.2056, "step": 2240 }, { "epoch": 0.12917918588406407, "grad_norm": 1.609375, "learning_rate": 9.968754089265362e-05, "loss": 7.231, "step": 2250 }, { "epoch": 0.12975331559910436, "grad_norm": 1.6875, "learning_rate": 9.968233016752917e-05, "loss": 7.2163, "step": 2260 }, { "epoch": 0.13032744531414464, "grad_norm": 1.71875, "learning_rate": 9.967707649106648e-05, "loss": 7.2007, "step": 2270 }, { "epoch": 0.13090157502918492, "grad_norm": 1.65625, "learning_rate": 9.967177986780746e-05, "loss": 7.2608, "step": 2280 }, { "epoch": 0.1314757047442252, "grad_norm": 1.7890625, "learning_rate": 9.966644030233114e-05, "loss": 7.222, "step": 2290 }, { "epoch": 0.1320498344592655, "grad_norm": 1.7265625, "learning_rate": 9.966105779925367e-05, "loss": 7.2187, "step": 2300 }, { "epoch": 0.13262396417430578, "grad_norm": 1.6953125, "learning_rate": 9.965563236322836e-05, "loss": 7.2438, "step": 2310 }, { "epoch": 0.13319809388934606, "grad_norm": 1.7890625, "learning_rate": 9.965016399894556e-05, "loss": 7.1962, "step": 2320 }, { "epoch": 0.13377222360438634, "grad_norm": 1.671875, "learning_rate": 9.964465271113282e-05, "loss": 7.222, "step": 2330 }, { "epoch": 0.13434635331942663, "grad_norm": 1.6484375, "learning_rate": 9.963909850455473e-05, "loss": 7.1955, "step": 2340 }, { "epoch": 0.1349204830344669, "grad_norm": 1.671875, "learning_rate": 9.963350138401299e-05, "loss": 7.2496, "step": 2350 }, { "epoch": 0.1354946127495072, "grad_norm": 1.7578125, "learning_rate": 9.962786135434648e-05, "loss": 7.2438, "step": 2360 }, { "epoch": 0.13606874246454748, "grad_norm": 1.703125, "learning_rate": 9.962217842043106e-05, "loss": 7.2093, "step": 2370 }, { "epoch": 0.1366428721795878, "grad_norm": 1.6953125, "learning_rate": 9.961645258717976e-05, "loss": 7.1862, "step": 2380 }, { "epoch": 0.13721700189462807, "grad_norm": 1.7265625, "learning_rate": 9.961068385954265e-05, "loss": 7.2067, "step": 2390 }, { "epoch": 0.13779113160966835, "grad_norm": 1.640625, "learning_rate": 9.960487224250694e-05, "loss": 7.2353, "step": 2400 }, { "epoch": 0.13836526132470864, "grad_norm": 1.6328125, "learning_rate": 9.959901774109687e-05, "loss": 7.1877, "step": 2410 }, { "epoch": 0.13893939103974892, "grad_norm": 1.6484375, "learning_rate": 9.959312036037379e-05, "loss": 7.2055, "step": 2420 }, { "epoch": 0.1395135207547892, "grad_norm": 1.7109375, "learning_rate": 9.958718010543607e-05, "loss": 7.2765, "step": 2430 }, { "epoch": 0.1400876504698295, "grad_norm": 1.6875, "learning_rate": 9.958119698141917e-05, "loss": 7.2015, "step": 2440 }, { "epoch": 0.14066178018486977, "grad_norm": 1.6640625, "learning_rate": 9.957517099349564e-05, "loss": 7.2288, "step": 2450 }, { "epoch": 0.14123590989991006, "grad_norm": 1.65625, "learning_rate": 9.956910214687507e-05, "loss": 7.251, "step": 2460 }, { "epoch": 0.14181003961495034, "grad_norm": 1.6484375, "learning_rate": 9.956299044680409e-05, "loss": 7.2318, "step": 2470 }, { "epoch": 0.14238416932999062, "grad_norm": 1.890625, "learning_rate": 9.955683589856634e-05, "loss": 7.2268, "step": 2480 }, { "epoch": 0.1429582990450309, "grad_norm": 1.5859375, "learning_rate": 9.955063850748263e-05, "loss": 7.2451, "step": 2490 }, { "epoch": 0.1435324287600712, "grad_norm": 1.671875, "learning_rate": 9.954439827891065e-05, "loss": 7.2232, "step": 2500 }, { "epoch": 0.14410655847511147, "grad_norm": 1.6875, "learning_rate": 9.953811521824522e-05, "loss": 7.2054, "step": 2510 }, { "epoch": 0.14468068819015176, "grad_norm": 1.609375, "learning_rate": 9.953178933091818e-05, "loss": 7.2127, "step": 2520 }, { "epoch": 0.14525481790519204, "grad_norm": 1.6875, "learning_rate": 9.952542062239838e-05, "loss": 7.2002, "step": 2530 }, { "epoch": 0.14582894762023232, "grad_norm": 1.71875, "learning_rate": 9.951900909819169e-05, "loss": 7.235, "step": 2540 }, { "epoch": 0.1464030773352726, "grad_norm": 1.7265625, "learning_rate": 9.9512554763841e-05, "loss": 7.2237, "step": 2550 }, { "epoch": 0.1469772070503129, "grad_norm": 1.6953125, "learning_rate": 9.95060576249262e-05, "loss": 7.2328, "step": 2560 }, { "epoch": 0.14755133676535317, "grad_norm": 1.6875, "learning_rate": 9.949951768706419e-05, "loss": 7.2167, "step": 2570 }, { "epoch": 0.14812546648039346, "grad_norm": 1.65625, "learning_rate": 9.949293495590888e-05, "loss": 7.1892, "step": 2580 }, { "epoch": 0.14869959619543374, "grad_norm": 1.6015625, "learning_rate": 9.948630943715118e-05, "loss": 7.1999, "step": 2590 }, { "epoch": 0.14927372591047405, "grad_norm": 1.65625, "learning_rate": 9.947964113651896e-05, "loss": 7.2392, "step": 2600 }, { "epoch": 0.14984785562551434, "grad_norm": 1.6640625, "learning_rate": 9.947293005977709e-05, "loss": 7.1989, "step": 2610 }, { "epoch": 0.15042198534055462, "grad_norm": 1.71875, "learning_rate": 9.946617621272745e-05, "loss": 7.1833, "step": 2620 }, { "epoch": 0.1509961150555949, "grad_norm": 1.6953125, "learning_rate": 9.945937960120886e-05, "loss": 7.2117, "step": 2630 }, { "epoch": 0.1515702447706352, "grad_norm": 1.6953125, "learning_rate": 9.945254023109712e-05, "loss": 7.1973, "step": 2640 }, { "epoch": 0.15214437448567547, "grad_norm": 1.6171875, "learning_rate": 9.9445658108305e-05, "loss": 7.1816, "step": 2650 }, { "epoch": 0.15271850420071575, "grad_norm": 1.640625, "learning_rate": 9.943873323878221e-05, "loss": 7.1913, "step": 2660 }, { "epoch": 0.15329263391575604, "grad_norm": 1.625, "learning_rate": 9.943176562851548e-05, "loss": 7.2218, "step": 2670 }, { "epoch": 0.15386676363079632, "grad_norm": 1.75, "learning_rate": 9.942475528352842e-05, "loss": 7.2328, "step": 2680 }, { "epoch": 0.1544408933458366, "grad_norm": 1.6796875, "learning_rate": 9.941770220988158e-05, "loss": 7.1863, "step": 2690 }, { "epoch": 0.1550150230608769, "grad_norm": 1.6015625, "learning_rate": 9.941060641367253e-05, "loss": 7.2014, "step": 2700 }, { "epoch": 0.15558915277591717, "grad_norm": 1.703125, "learning_rate": 9.940346790103569e-05, "loss": 7.2158, "step": 2710 }, { "epoch": 0.15616328249095746, "grad_norm": 1.7890625, "learning_rate": 9.939628667814244e-05, "loss": 7.1862, "step": 2720 }, { "epoch": 0.15673741220599774, "grad_norm": 1.6796875, "learning_rate": 9.938906275120112e-05, "loss": 7.169, "step": 2730 }, { "epoch": 0.15731154192103802, "grad_norm": 1.671875, "learning_rate": 9.938179612645693e-05, "loss": 7.2047, "step": 2740 }, { "epoch": 0.1578856716360783, "grad_norm": 1.6484375, "learning_rate": 9.9374486810192e-05, "loss": 7.2038, "step": 2750 }, { "epoch": 0.1584598013511186, "grad_norm": 1.65625, "learning_rate": 9.93671348087254e-05, "loss": 7.2069, "step": 2760 }, { "epoch": 0.15903393106615887, "grad_norm": 1.671875, "learning_rate": 9.935974012841305e-05, "loss": 7.2183, "step": 2770 }, { "epoch": 0.15960806078119916, "grad_norm": 1.5859375, "learning_rate": 9.935230277564782e-05, "loss": 7.2102, "step": 2780 }, { "epoch": 0.16018219049623944, "grad_norm": 1.8515625, "learning_rate": 9.934482275685943e-05, "loss": 7.2052, "step": 2790 }, { "epoch": 0.16075632021127972, "grad_norm": 1.6796875, "learning_rate": 9.933730007851451e-05, "loss": 7.1924, "step": 2800 }, { "epoch": 0.16133044992632, "grad_norm": 1.6484375, "learning_rate": 9.932973474711655e-05, "loss": 7.2124, "step": 2810 }, { "epoch": 0.1619045796413603, "grad_norm": 1.625, "learning_rate": 9.932212676920595e-05, "loss": 7.2017, "step": 2820 }, { "epoch": 0.1624787093564006, "grad_norm": 1.5859375, "learning_rate": 9.931447615135994e-05, "loss": 7.2266, "step": 2830 }, { "epoch": 0.16305283907144089, "grad_norm": 1.5703125, "learning_rate": 9.93067829001926e-05, "loss": 7.1716, "step": 2840 }, { "epoch": 0.16362696878648117, "grad_norm": 1.703125, "learning_rate": 9.929904702235495e-05, "loss": 7.1671, "step": 2850 }, { "epoch": 0.16420109850152145, "grad_norm": 1.671875, "learning_rate": 9.929126852453477e-05, "loss": 7.1841, "step": 2860 }, { "epoch": 0.16477522821656174, "grad_norm": 1.6953125, "learning_rate": 9.928344741345672e-05, "loss": 7.2104, "step": 2870 }, { "epoch": 0.16534935793160202, "grad_norm": 1.578125, "learning_rate": 9.92755836958823e-05, "loss": 7.1866, "step": 2880 }, { "epoch": 0.1659234876466423, "grad_norm": 1.7109375, "learning_rate": 9.926767737860988e-05, "loss": 7.1634, "step": 2890 }, { "epoch": 0.1664976173616826, "grad_norm": 1.6484375, "learning_rate": 9.92597284684746e-05, "loss": 7.2023, "step": 2900 }, { "epoch": 0.16707174707672287, "grad_norm": 1.7578125, "learning_rate": 9.925173697234844e-05, "loss": 7.1846, "step": 2910 }, { "epoch": 0.16764587679176315, "grad_norm": 1.6953125, "learning_rate": 9.924370289714022e-05, "loss": 7.1909, "step": 2920 }, { "epoch": 0.16822000650680344, "grad_norm": 1.5859375, "learning_rate": 9.923562624979555e-05, "loss": 7.1852, "step": 2930 }, { "epoch": 0.16879413622184372, "grad_norm": 1.6484375, "learning_rate": 9.922750703729684e-05, "loss": 7.1982, "step": 2940 }, { "epoch": 0.169368265936884, "grad_norm": 1.65625, "learning_rate": 9.921934526666332e-05, "loss": 7.1717, "step": 2950 }, { "epoch": 0.1699423956519243, "grad_norm": 1.6796875, "learning_rate": 9.921114094495099e-05, "loss": 7.1865, "step": 2960 }, { "epoch": 0.17051652536696457, "grad_norm": 1.6328125, "learning_rate": 9.920289407925263e-05, "loss": 7.194, "step": 2970 }, { "epoch": 0.17109065508200486, "grad_norm": 1.65625, "learning_rate": 9.919460467669785e-05, "loss": 7.1622, "step": 2980 }, { "epoch": 0.17166478479704514, "grad_norm": 1.6953125, "learning_rate": 9.918627274445297e-05, "loss": 7.1727, "step": 2990 }, { "epoch": 0.17223891451208542, "grad_norm": 1.6328125, "learning_rate": 9.917789828972113e-05, "loss": 7.2052, "step": 3000 }, { "epoch": 0.1728130442271257, "grad_norm": 1.6015625, "learning_rate": 9.916948131974217e-05, "loss": 7.2079, "step": 3010 }, { "epoch": 0.173387173942166, "grad_norm": 1.8203125, "learning_rate": 9.916102184179279e-05, "loss": 7.1754, "step": 3020 }, { "epoch": 0.17396130365720627, "grad_norm": 1.703125, "learning_rate": 9.91525198631863e-05, "loss": 7.1914, "step": 3030 }, { "epoch": 0.17453543337224656, "grad_norm": 1.6875, "learning_rate": 9.914397539127289e-05, "loss": 7.1679, "step": 3040 }, { "epoch": 0.17510956308728687, "grad_norm": 1.5859375, "learning_rate": 9.913538843343936e-05, "loss": 7.181, "step": 3050 }, { "epoch": 0.17568369280232715, "grad_norm": 1.53125, "learning_rate": 9.912675899710934e-05, "loss": 7.1929, "step": 3060 }, { "epoch": 0.17625782251736744, "grad_norm": 1.65625, "learning_rate": 9.911808708974315e-05, "loss": 7.174, "step": 3070 }, { "epoch": 0.17683195223240772, "grad_norm": 1.640625, "learning_rate": 9.91093727188378e-05, "loss": 7.1991, "step": 3080 }, { "epoch": 0.177406081947448, "grad_norm": 1.640625, "learning_rate": 9.910061589192705e-05, "loss": 7.1869, "step": 3090 }, { "epoch": 0.17798021166248829, "grad_norm": 1.7421875, "learning_rate": 9.909181661658134e-05, "loss": 7.1955, "step": 3100 }, { "epoch": 0.17855434137752857, "grad_norm": 1.65625, "learning_rate": 9.908297490040778e-05, "loss": 7.1625, "step": 3110 }, { "epoch": 0.17912847109256885, "grad_norm": 1.78125, "learning_rate": 9.907409075105028e-05, "loss": 7.201, "step": 3120 }, { "epoch": 0.17970260080760914, "grad_norm": 1.6796875, "learning_rate": 9.90651641761893e-05, "loss": 7.167, "step": 3130 }, { "epoch": 0.18027673052264942, "grad_norm": 1.671875, "learning_rate": 9.905619518354205e-05, "loss": 7.1732, "step": 3140 }, { "epoch": 0.1808508602376897, "grad_norm": 1.640625, "learning_rate": 9.904718378086242e-05, "loss": 7.175, "step": 3150 }, { "epoch": 0.18142498995273, "grad_norm": 1.7109375, "learning_rate": 9.903812997594092e-05, "loss": 7.169, "step": 3160 }, { "epoch": 0.18199911966777027, "grad_norm": 1.6484375, "learning_rate": 9.902903377660473e-05, "loss": 7.1731, "step": 3170 }, { "epoch": 0.18257324938281055, "grad_norm": 1.921875, "learning_rate": 9.901989519071773e-05, "loss": 7.1691, "step": 3180 }, { "epoch": 0.18314737909785084, "grad_norm": 1.6484375, "learning_rate": 9.901071422618036e-05, "loss": 7.1727, "step": 3190 }, { "epoch": 0.18372150881289112, "grad_norm": 1.59375, "learning_rate": 9.900149089092978e-05, "loss": 7.1871, "step": 3200 }, { "epoch": 0.1842956385279314, "grad_norm": 1.6875, "learning_rate": 9.899222519293971e-05, "loss": 7.1621, "step": 3210 }, { "epoch": 0.1848697682429717, "grad_norm": 1.6953125, "learning_rate": 9.898291714022055e-05, "loss": 7.1787, "step": 3220 }, { "epoch": 0.18544389795801197, "grad_norm": 1.65625, "learning_rate": 9.897356674081928e-05, "loss": 7.1843, "step": 3230 }, { "epoch": 0.18601802767305226, "grad_norm": 1.671875, "learning_rate": 9.89641740028195e-05, "loss": 7.1441, "step": 3240 }, { "epoch": 0.18659215738809254, "grad_norm": 1.6171875, "learning_rate": 9.895473893434142e-05, "loss": 7.1718, "step": 3250 }, { "epoch": 0.18716628710313282, "grad_norm": 1.75, "learning_rate": 9.894526154354185e-05, "loss": 7.177, "step": 3260 }, { "epoch": 0.1877404168181731, "grad_norm": 1.7109375, "learning_rate": 9.893574183861417e-05, "loss": 7.1814, "step": 3270 }, { "epoch": 0.18831454653321342, "grad_norm": 1.734375, "learning_rate": 9.892617982778833e-05, "loss": 7.166, "step": 3280 }, { "epoch": 0.1888886762482537, "grad_norm": 1.7734375, "learning_rate": 9.89165755193309e-05, "loss": 7.1755, "step": 3290 }, { "epoch": 0.18946280596329398, "grad_norm": 1.65625, "learning_rate": 9.890692892154498e-05, "loss": 7.1605, "step": 3300 }, { "epoch": 0.19003693567833427, "grad_norm": 1.6640625, "learning_rate": 9.889724004277023e-05, "loss": 7.1718, "step": 3310 }, { "epoch": 0.19061106539337455, "grad_norm": 1.546875, "learning_rate": 9.88875088913829e-05, "loss": 7.174, "step": 3320 }, { "epoch": 0.19118519510841483, "grad_norm": 1.78125, "learning_rate": 9.887773547579574e-05, "loss": 7.1784, "step": 3330 }, { "epoch": 0.19175932482345512, "grad_norm": 1.7109375, "learning_rate": 9.886791980445806e-05, "loss": 7.1844, "step": 3340 }, { "epoch": 0.1923334545384954, "grad_norm": 1.6796875, "learning_rate": 9.885806188585571e-05, "loss": 7.1523, "step": 3350 }, { "epoch": 0.19290758425353569, "grad_norm": 1.734375, "learning_rate": 9.884816172851104e-05, "loss": 7.1403, "step": 3360 }, { "epoch": 0.19348171396857597, "grad_norm": 1.59375, "learning_rate": 9.883821934098292e-05, "loss": 7.1545, "step": 3370 }, { "epoch": 0.19405584368361625, "grad_norm": 1.578125, "learning_rate": 9.882823473186675e-05, "loss": 7.1925, "step": 3380 }, { "epoch": 0.19462997339865654, "grad_norm": 1.6328125, "learning_rate": 9.881820790979443e-05, "loss": 7.1765, "step": 3390 }, { "epoch": 0.19520410311369682, "grad_norm": 1.65625, "learning_rate": 9.880813888343431e-05, "loss": 7.1598, "step": 3400 }, { "epoch": 0.1957782328287371, "grad_norm": 1.609375, "learning_rate": 9.87980276614913e-05, "loss": 7.1651, "step": 3410 }, { "epoch": 0.1963523625437774, "grad_norm": 1.609375, "learning_rate": 9.87878742527067e-05, "loss": 7.1966, "step": 3420 }, { "epoch": 0.19692649225881767, "grad_norm": 1.7109375, "learning_rate": 9.877767866585837e-05, "loss": 7.1851, "step": 3430 }, { "epoch": 0.19750062197385795, "grad_norm": 1.6796875, "learning_rate": 9.876744090976056e-05, "loss": 7.1835, "step": 3440 }, { "epoch": 0.19807475168889824, "grad_norm": 1.6953125, "learning_rate": 9.875716099326404e-05, "loss": 7.2052, "step": 3450 }, { "epoch": 0.19864888140393852, "grad_norm": 1.609375, "learning_rate": 9.874683892525598e-05, "loss": 7.1426, "step": 3460 }, { "epoch": 0.1992230111189788, "grad_norm": 1.71875, "learning_rate": 9.873647471466e-05, "loss": 7.1271, "step": 3470 }, { "epoch": 0.1997971408340191, "grad_norm": 1.5859375, "learning_rate": 9.872606837043617e-05, "loss": 7.1351, "step": 3480 }, { "epoch": 0.20037127054905937, "grad_norm": 1.671875, "learning_rate": 9.871561990158097e-05, "loss": 7.1292, "step": 3490 }, { "epoch": 0.20094540026409966, "grad_norm": 1.6328125, "learning_rate": 9.870512931712734e-05, "loss": 7.1518, "step": 3500 }, { "epoch": 0.20151952997913997, "grad_norm": 1.703125, "learning_rate": 9.869459662614455e-05, "loss": 7.1549, "step": 3510 }, { "epoch": 0.20209365969418025, "grad_norm": 1.6328125, "learning_rate": 9.868402183773833e-05, "loss": 7.1718, "step": 3520 }, { "epoch": 0.20266778940922053, "grad_norm": 1.6328125, "learning_rate": 9.867340496105079e-05, "loss": 7.1645, "step": 3530 }, { "epoch": 0.20324191912426082, "grad_norm": 1.625, "learning_rate": 9.866274600526043e-05, "loss": 7.1599, "step": 3540 }, { "epoch": 0.2038160488393011, "grad_norm": 1.75, "learning_rate": 9.865204497958211e-05, "loss": 7.1633, "step": 3550 }, { "epoch": 0.20439017855434138, "grad_norm": 1.65625, "learning_rate": 9.864130189326709e-05, "loss": 7.1397, "step": 3560 }, { "epoch": 0.20496430826938167, "grad_norm": 1.640625, "learning_rate": 9.863051675560297e-05, "loss": 7.1739, "step": 3570 }, { "epoch": 0.20553843798442195, "grad_norm": 1.8515625, "learning_rate": 9.861968957591372e-05, "loss": 7.157, "step": 3580 }, { "epoch": 0.20611256769946223, "grad_norm": 1.6484375, "learning_rate": 9.860882036355962e-05, "loss": 7.1219, "step": 3590 }, { "epoch": 0.20668669741450252, "grad_norm": 1.6953125, "learning_rate": 9.859790912793737e-05, "loss": 7.1577, "step": 3600 }, { "epoch": 0.2072608271295428, "grad_norm": 1.6640625, "learning_rate": 9.858695587847987e-05, "loss": 7.1883, "step": 3610 }, { "epoch": 0.20783495684458309, "grad_norm": 1.609375, "learning_rate": 9.857596062465648e-05, "loss": 7.1774, "step": 3620 }, { "epoch": 0.20840908655962337, "grad_norm": 1.625, "learning_rate": 9.856492337597276e-05, "loss": 7.1515, "step": 3630 }, { "epoch": 0.20898321627466365, "grad_norm": 1.71875, "learning_rate": 9.855384414197067e-05, "loss": 7.1831, "step": 3640 }, { "epoch": 0.20955734598970394, "grad_norm": 1.6796875, "learning_rate": 9.854272293222841e-05, "loss": 7.1699, "step": 3650 }, { "epoch": 0.21013147570474422, "grad_norm": 1.6640625, "learning_rate": 9.853155975636045e-05, "loss": 7.1675, "step": 3660 }, { "epoch": 0.2107056054197845, "grad_norm": 1.6875, "learning_rate": 9.852035462401764e-05, "loss": 7.1572, "step": 3670 }, { "epoch": 0.2112797351348248, "grad_norm": 1.5546875, "learning_rate": 9.850910754488697e-05, "loss": 7.1519, "step": 3680 }, { "epoch": 0.21185386484986507, "grad_norm": 1.6953125, "learning_rate": 9.849781852869176e-05, "loss": 7.1442, "step": 3690 }, { "epoch": 0.21242799456490535, "grad_norm": 1.609375, "learning_rate": 9.848648758519161e-05, "loss": 7.1557, "step": 3700 }, { "epoch": 0.21300212427994564, "grad_norm": 1.6171875, "learning_rate": 9.847511472418235e-05, "loss": 7.1443, "step": 3710 }, { "epoch": 0.21357625399498592, "grad_norm": 1.6796875, "learning_rate": 9.846369995549601e-05, "loss": 7.1367, "step": 3720 }, { "epoch": 0.21415038371002623, "grad_norm": 1.703125, "learning_rate": 9.84522432890009e-05, "loss": 7.1849, "step": 3730 }, { "epoch": 0.21472451342506652, "grad_norm": 1.6484375, "learning_rate": 9.844074473460152e-05, "loss": 7.1564, "step": 3740 }, { "epoch": 0.2152986431401068, "grad_norm": 1.609375, "learning_rate": 9.842920430223858e-05, "loss": 7.1428, "step": 3750 }, { "epoch": 0.21587277285514708, "grad_norm": 1.7265625, "learning_rate": 9.841762200188904e-05, "loss": 7.1381, "step": 3760 }, { "epoch": 0.21644690257018737, "grad_norm": 1.609375, "learning_rate": 9.840599784356601e-05, "loss": 7.1563, "step": 3770 }, { "epoch": 0.21702103228522765, "grad_norm": 1.609375, "learning_rate": 9.839433183731879e-05, "loss": 7.1821, "step": 3780 }, { "epoch": 0.21759516200026793, "grad_norm": 1.703125, "learning_rate": 9.838262399323288e-05, "loss": 7.1049, "step": 3790 }, { "epoch": 0.21816929171530822, "grad_norm": 1.625, "learning_rate": 9.837087432142993e-05, "loss": 7.1677, "step": 3800 }, { "epoch": 0.2187434214303485, "grad_norm": 1.5546875, "learning_rate": 9.835908283206777e-05, "loss": 7.1602, "step": 3810 }, { "epoch": 0.21931755114538878, "grad_norm": 1.6640625, "learning_rate": 9.834724953534036e-05, "loss": 7.158, "step": 3820 }, { "epoch": 0.21989168086042907, "grad_norm": 1.75, "learning_rate": 9.833537444147781e-05, "loss": 7.1471, "step": 3830 }, { "epoch": 0.22046581057546935, "grad_norm": 1.703125, "learning_rate": 9.832345756074639e-05, "loss": 7.1652, "step": 3840 }, { "epoch": 0.22103994029050963, "grad_norm": 1.5625, "learning_rate": 9.831149890344846e-05, "loss": 7.1406, "step": 3850 }, { "epoch": 0.22161407000554992, "grad_norm": 1.625, "learning_rate": 9.82994984799225e-05, "loss": 7.1428, "step": 3860 }, { "epoch": 0.2221881997205902, "grad_norm": 1.6328125, "learning_rate": 9.828745630054314e-05, "loss": 7.1159, "step": 3870 }, { "epoch": 0.22276232943563049, "grad_norm": 1.671875, "learning_rate": 9.827537237572107e-05, "loss": 7.1448, "step": 3880 }, { "epoch": 0.22333645915067077, "grad_norm": 1.7109375, "learning_rate": 9.826324671590303e-05, "loss": 7.132, "step": 3890 }, { "epoch": 0.22391058886571105, "grad_norm": 1.6328125, "learning_rate": 9.825107933157196e-05, "loss": 7.1557, "step": 3900 }, { "epoch": 0.22448471858075134, "grad_norm": 1.71875, "learning_rate": 9.823887023324675e-05, "loss": 7.1258, "step": 3910 }, { "epoch": 0.22505884829579162, "grad_norm": 1.6328125, "learning_rate": 9.822661943148243e-05, "loss": 7.1438, "step": 3920 }, { "epoch": 0.2256329780108319, "grad_norm": 1.59375, "learning_rate": 9.821432693687004e-05, "loss": 7.1421, "step": 3930 }, { "epoch": 0.2262071077258722, "grad_norm": 1.640625, "learning_rate": 9.820199276003667e-05, "loss": 7.1424, "step": 3940 }, { "epoch": 0.22678123744091247, "grad_norm": 1.59375, "learning_rate": 9.818961691164548e-05, "loss": 7.1514, "step": 3950 }, { "epoch": 0.22735536715595278, "grad_norm": 1.609375, "learning_rate": 9.817719940239563e-05, "loss": 7.128, "step": 3960 }, { "epoch": 0.22792949687099306, "grad_norm": 1.703125, "learning_rate": 9.816474024302228e-05, "loss": 7.1122, "step": 3970 }, { "epoch": 0.22850362658603335, "grad_norm": 1.78125, "learning_rate": 9.815223944429662e-05, "loss": 7.1329, "step": 3980 }, { "epoch": 0.22907775630107363, "grad_norm": 1.6015625, "learning_rate": 9.813969701702583e-05, "loss": 7.1356, "step": 3990 }, { "epoch": 0.22965188601611392, "grad_norm": 1.6953125, "learning_rate": 9.81271129720531e-05, "loss": 7.1245, "step": 4000 }, { "epoch": 0.2302260157311542, "grad_norm": 1.6328125, "learning_rate": 9.811448732025757e-05, "loss": 7.1262, "step": 4010 }, { "epoch": 0.23080014544619448, "grad_norm": 1.578125, "learning_rate": 9.810182007255435e-05, "loss": 7.1314, "step": 4020 }, { "epoch": 0.23137427516123477, "grad_norm": 1.7109375, "learning_rate": 9.808911123989452e-05, "loss": 7.134, "step": 4030 }, { "epoch": 0.23194840487627505, "grad_norm": 1.640625, "learning_rate": 9.807636083326515e-05, "loss": 7.1211, "step": 4040 }, { "epoch": 0.23252253459131533, "grad_norm": 1.625, "learning_rate": 9.806356886368917e-05, "loss": 7.1423, "step": 4050 }, { "epoch": 0.23309666430635562, "grad_norm": 1.640625, "learning_rate": 9.80507353422255e-05, "loss": 7.1196, "step": 4060 }, { "epoch": 0.2336707940213959, "grad_norm": 1.6171875, "learning_rate": 9.803786027996899e-05, "loss": 7.1225, "step": 4070 }, { "epoch": 0.23424492373643618, "grad_norm": 1.6875, "learning_rate": 9.802494368805035e-05, "loss": 7.1493, "step": 4080 }, { "epoch": 0.23481905345147647, "grad_norm": 1.765625, "learning_rate": 9.801198557763623e-05, "loss": 7.1441, "step": 4090 }, { "epoch": 0.23539318316651675, "grad_norm": 1.671875, "learning_rate": 9.799898595992919e-05, "loss": 7.1074, "step": 4100 }, { "epoch": 0.23596731288155703, "grad_norm": 1.7109375, "learning_rate": 9.798594484616762e-05, "loss": 7.1399, "step": 4110 }, { "epoch": 0.23654144259659732, "grad_norm": 1.671875, "learning_rate": 9.797286224762584e-05, "loss": 7.1218, "step": 4120 }, { "epoch": 0.2371155723116376, "grad_norm": 1.59375, "learning_rate": 9.7959738175614e-05, "loss": 7.1224, "step": 4130 }, { "epoch": 0.23768970202667788, "grad_norm": 1.640625, "learning_rate": 9.794657264147811e-05, "loss": 7.1262, "step": 4140 }, { "epoch": 0.23826383174171817, "grad_norm": 1.703125, "learning_rate": 9.793336565660005e-05, "loss": 7.1277, "step": 4150 }, { "epoch": 0.23883796145675845, "grad_norm": 1.6015625, "learning_rate": 9.792011723239751e-05, "loss": 7.1333, "step": 4160 }, { "epoch": 0.23941209117179874, "grad_norm": 1.6875, "learning_rate": 9.790682738032397e-05, "loss": 7.1199, "step": 4170 }, { "epoch": 0.23998622088683905, "grad_norm": 1.609375, "learning_rate": 9.789349611186882e-05, "loss": 7.1562, "step": 4180 }, { "epoch": 0.24056035060187933, "grad_norm": 1.7265625, "learning_rate": 9.788012343855716e-05, "loss": 7.1346, "step": 4190 }, { "epoch": 0.2411344803169196, "grad_norm": 1.671875, "learning_rate": 9.786670937194996e-05, "loss": 7.1284, "step": 4200 }, { "epoch": 0.2417086100319599, "grad_norm": 1.609375, "learning_rate": 9.785325392364391e-05, "loss": 7.1572, "step": 4210 }, { "epoch": 0.24228273974700018, "grad_norm": 1.6171875, "learning_rate": 9.783975710527154e-05, "loss": 7.1039, "step": 4220 }, { "epoch": 0.24285686946204046, "grad_norm": 1.6875, "learning_rate": 9.782621892850106e-05, "loss": 7.108, "step": 4230 }, { "epoch": 0.24343099917708075, "grad_norm": 1.625, "learning_rate": 9.781263940503653e-05, "loss": 7.1546, "step": 4240 }, { "epoch": 0.24400512889212103, "grad_norm": 1.6328125, "learning_rate": 9.77990185466177e-05, "loss": 7.1348, "step": 4250 }, { "epoch": 0.24457925860716132, "grad_norm": 1.7578125, "learning_rate": 9.778535636502005e-05, "loss": 7.1317, "step": 4260 }, { "epoch": 0.2451533883222016, "grad_norm": 1.640625, "learning_rate": 9.777165287205484e-05, "loss": 7.1193, "step": 4270 }, { "epoch": 0.24572751803724188, "grad_norm": 1.5859375, "learning_rate": 9.775790807956894e-05, "loss": 7.1458, "step": 4280 }, { "epoch": 0.24630164775228217, "grad_norm": 1.5390625, "learning_rate": 9.774412199944507e-05, "loss": 7.0719, "step": 4290 }, { "epoch": 0.24687577746732245, "grad_norm": 1.6328125, "learning_rate": 9.773029464360151e-05, "loss": 7.1046, "step": 4300 }, { "epoch": 0.24744990718236273, "grad_norm": 1.6796875, "learning_rate": 9.771642602399229e-05, "loss": 7.1422, "step": 4310 }, { "epoch": 0.24802403689740302, "grad_norm": 1.6875, "learning_rate": 9.77025161526071e-05, "loss": 7.0738, "step": 4320 }, { "epoch": 0.2485981666124433, "grad_norm": 1.5078125, "learning_rate": 9.76885650414713e-05, "loss": 7.0958, "step": 4330 }, { "epoch": 0.24917229632748358, "grad_norm": 1.640625, "learning_rate": 9.76745727026459e-05, "loss": 7.1058, "step": 4340 }, { "epoch": 0.24974642604252387, "grad_norm": 1.734375, "learning_rate": 9.766053914822754e-05, "loss": 7.1442, "step": 4350 }, { "epoch": 0.2503205557575642, "grad_norm": 1.71875, "learning_rate": 9.764646439034849e-05, "loss": 7.1123, "step": 4360 }, { "epoch": 0.25089468547260446, "grad_norm": 1.546875, "learning_rate": 9.763234844117666e-05, "loss": 7.1233, "step": 4370 }, { "epoch": 0.25146881518764475, "grad_norm": 1.7109375, "learning_rate": 9.761819131291557e-05, "loss": 7.1272, "step": 4380 }, { "epoch": 0.25204294490268503, "grad_norm": 1.6796875, "learning_rate": 9.760399301780433e-05, "loss": 7.0923, "step": 4390 }, { "epoch": 0.2526170746177253, "grad_norm": 1.5859375, "learning_rate": 9.758975356811763e-05, "loss": 7.1102, "step": 4400 }, { "epoch": 0.2531912043327656, "grad_norm": 1.6328125, "learning_rate": 9.757547297616576e-05, "loss": 7.1003, "step": 4410 }, { "epoch": 0.2537653340478059, "grad_norm": 1.6953125, "learning_rate": 9.756115125429457e-05, "loss": 7.1449, "step": 4420 }, { "epoch": 0.25433946376284616, "grad_norm": 1.6875, "learning_rate": 9.754678841488545e-05, "loss": 7.1108, "step": 4430 }, { "epoch": 0.25491359347788645, "grad_norm": 1.5625, "learning_rate": 9.753238447035537e-05, "loss": 7.1287, "step": 4440 }, { "epoch": 0.25548772319292673, "grad_norm": 1.703125, "learning_rate": 9.751793943315683e-05, "loss": 7.0953, "step": 4450 }, { "epoch": 0.256061852907967, "grad_norm": 1.5703125, "learning_rate": 9.750345331577781e-05, "loss": 7.1042, "step": 4460 }, { "epoch": 0.2566359826230073, "grad_norm": 1.6328125, "learning_rate": 9.748892613074188e-05, "loss": 7.1171, "step": 4470 }, { "epoch": 0.2572101123380476, "grad_norm": 1.6171875, "learning_rate": 9.747435789060804e-05, "loss": 7.1258, "step": 4480 }, { "epoch": 0.25778424205308786, "grad_norm": 1.71875, "learning_rate": 9.745974860797084e-05, "loss": 7.116, "step": 4490 }, { "epoch": 0.25835837176812815, "grad_norm": 1.640625, "learning_rate": 9.744509829546027e-05, "loss": 7.1304, "step": 4500 }, { "epoch": 0.25893250148316843, "grad_norm": 1.609375, "learning_rate": 9.743040696574182e-05, "loss": 7.1077, "step": 4510 }, { "epoch": 0.2595066311982087, "grad_norm": 1.6875, "learning_rate": 9.741567463151642e-05, "loss": 7.1422, "step": 4520 }, { "epoch": 0.260080760913249, "grad_norm": 1.5625, "learning_rate": 9.740090130552046e-05, "loss": 7.1365, "step": 4530 }, { "epoch": 0.2606548906282893, "grad_norm": 1.71875, "learning_rate": 9.73860870005258e-05, "loss": 7.0826, "step": 4540 }, { "epoch": 0.26122902034332957, "grad_norm": 1.578125, "learning_rate": 9.737123172933964e-05, "loss": 7.1016, "step": 4550 }, { "epoch": 0.26180315005836985, "grad_norm": 1.6640625, "learning_rate": 9.735633550480469e-05, "loss": 7.1019, "step": 4560 }, { "epoch": 0.26237727977341013, "grad_norm": 1.6171875, "learning_rate": 9.7341398339799e-05, "loss": 7.0796, "step": 4570 }, { "epoch": 0.2629514094884504, "grad_norm": 1.640625, "learning_rate": 9.732642024723605e-05, "loss": 7.1059, "step": 4580 }, { "epoch": 0.2635255392034907, "grad_norm": 1.578125, "learning_rate": 9.731140124006471e-05, "loss": 7.1171, "step": 4590 }, { "epoch": 0.264099668918531, "grad_norm": 1.59375, "learning_rate": 9.729634133126917e-05, "loss": 7.1195, "step": 4600 }, { "epoch": 0.26467379863357127, "grad_norm": 1.765625, "learning_rate": 9.728124053386905e-05, "loss": 7.1011, "step": 4610 }, { "epoch": 0.26524792834861155, "grad_norm": 1.71875, "learning_rate": 9.726609886091925e-05, "loss": 7.1241, "step": 4620 }, { "epoch": 0.26582205806365183, "grad_norm": 1.640625, "learning_rate": 9.725091632551002e-05, "loss": 7.1232, "step": 4630 }, { "epoch": 0.2663961877786921, "grad_norm": 1.65625, "learning_rate": 9.723569294076702e-05, "loss": 7.1256, "step": 4640 }, { "epoch": 0.2669703174937324, "grad_norm": 1.609375, "learning_rate": 9.722042871985112e-05, "loss": 7.1163, "step": 4650 }, { "epoch": 0.2675444472087727, "grad_norm": 1.5625, "learning_rate": 9.720512367595854e-05, "loss": 7.1269, "step": 4660 }, { "epoch": 0.26811857692381297, "grad_norm": 1.609375, "learning_rate": 9.718977782232079e-05, "loss": 7.1275, "step": 4670 }, { "epoch": 0.26869270663885325, "grad_norm": 1.5703125, "learning_rate": 9.717439117220468e-05, "loss": 7.0919, "step": 4680 }, { "epoch": 0.26926683635389353, "grad_norm": 1.6953125, "learning_rate": 9.715896373891222e-05, "loss": 7.1171, "step": 4690 }, { "epoch": 0.2698409660689338, "grad_norm": 1.6484375, "learning_rate": 9.714349553578077e-05, "loss": 7.0927, "step": 4700 }, { "epoch": 0.2704150957839741, "grad_norm": 1.625, "learning_rate": 9.712798657618287e-05, "loss": 7.1006, "step": 4710 }, { "epoch": 0.2709892254990144, "grad_norm": 1.625, "learning_rate": 9.711243687352632e-05, "loss": 7.0835, "step": 4720 }, { "epoch": 0.27156335521405467, "grad_norm": 1.6875, "learning_rate": 9.709684644125413e-05, "loss": 7.11, "step": 4730 }, { "epoch": 0.27213748492909495, "grad_norm": 1.6640625, "learning_rate": 9.708121529284455e-05, "loss": 7.1328, "step": 4740 }, { "epoch": 0.2727116146441353, "grad_norm": 1.59375, "learning_rate": 9.706554344181101e-05, "loss": 7.084, "step": 4750 }, { "epoch": 0.2732857443591756, "grad_norm": 1.625, "learning_rate": 9.70498309017021e-05, "loss": 7.1197, "step": 4760 }, { "epoch": 0.27385987407421586, "grad_norm": 1.625, "learning_rate": 9.703407768610164e-05, "loss": 7.1251, "step": 4770 }, { "epoch": 0.27443400378925614, "grad_norm": 1.703125, "learning_rate": 9.70182838086286e-05, "loss": 7.0986, "step": 4780 }, { "epoch": 0.2750081335042964, "grad_norm": 1.703125, "learning_rate": 9.700244928293708e-05, "loss": 7.1065, "step": 4790 }, { "epoch": 0.2755822632193367, "grad_norm": 1.703125, "learning_rate": 9.698657412271634e-05, "loss": 7.1378, "step": 4800 }, { "epoch": 0.276156392934377, "grad_norm": 1.640625, "learning_rate": 9.697065834169075e-05, "loss": 7.096, "step": 4810 }, { "epoch": 0.2767305226494173, "grad_norm": 1.6015625, "learning_rate": 9.695470195361982e-05, "loss": 7.105, "step": 4820 }, { "epoch": 0.27730465236445756, "grad_norm": 1.6171875, "learning_rate": 9.693870497229816e-05, "loss": 7.1321, "step": 4830 }, { "epoch": 0.27787878207949784, "grad_norm": 1.625, "learning_rate": 9.692266741155547e-05, "loss": 7.104, "step": 4840 }, { "epoch": 0.2784529117945381, "grad_norm": 1.65625, "learning_rate": 9.690658928525653e-05, "loss": 7.1017, "step": 4850 }, { "epoch": 0.2790270415095784, "grad_norm": 1.6484375, "learning_rate": 9.689047060730119e-05, "loss": 7.1238, "step": 4860 }, { "epoch": 0.2796011712246187, "grad_norm": 1.7109375, "learning_rate": 9.687431139162437e-05, "loss": 7.0816, "step": 4870 }, { "epoch": 0.280175300939659, "grad_norm": 1.5859375, "learning_rate": 9.6858111652196e-05, "loss": 7.0878, "step": 4880 }, { "epoch": 0.28074943065469926, "grad_norm": 1.6640625, "learning_rate": 9.684187140302113e-05, "loss": 7.0907, "step": 4890 }, { "epoch": 0.28132356036973954, "grad_norm": 1.8203125, "learning_rate": 9.682559065813969e-05, "loss": 7.1139, "step": 4900 }, { "epoch": 0.28189769008477983, "grad_norm": 1.6796875, "learning_rate": 9.680926943162674e-05, "loss": 7.1432, "step": 4910 }, { "epoch": 0.2824718197998201, "grad_norm": 1.6484375, "learning_rate": 9.67929077375923e-05, "loss": 7.1069, "step": 4920 }, { "epoch": 0.2830459495148604, "grad_norm": 1.6328125, "learning_rate": 9.677650559018137e-05, "loss": 7.1006, "step": 4930 }, { "epoch": 0.2836200792299007, "grad_norm": 1.578125, "learning_rate": 9.676006300357392e-05, "loss": 7.1214, "step": 4940 }, { "epoch": 0.28419420894494096, "grad_norm": 1.5859375, "learning_rate": 9.674357999198489e-05, "loss": 7.1161, "step": 4950 }, { "epoch": 0.28476833865998125, "grad_norm": 1.625, "learning_rate": 9.672705656966417e-05, "loss": 7.0781, "step": 4960 }, { "epoch": 0.28534246837502153, "grad_norm": 1.625, "learning_rate": 9.671049275089654e-05, "loss": 7.0649, "step": 4970 }, { "epoch": 0.2859165980900618, "grad_norm": 1.703125, "learning_rate": 9.669388855000178e-05, "loss": 7.0862, "step": 4980 }, { "epoch": 0.2864907278051021, "grad_norm": 1.6015625, "learning_rate": 9.667724398133455e-05, "loss": 7.1193, "step": 4990 }, { "epoch": 0.2870648575201424, "grad_norm": 1.6171875, "learning_rate": 9.666055905928437e-05, "loss": 7.1017, "step": 5000 }, { "epoch": 0.28763898723518266, "grad_norm": 1.671875, "learning_rate": 9.66438337982757e-05, "loss": 7.0887, "step": 5010 }, { "epoch": 0.28821311695022295, "grad_norm": 1.640625, "learning_rate": 9.662706821276787e-05, "loss": 7.135, "step": 5020 }, { "epoch": 0.28878724666526323, "grad_norm": 1.5859375, "learning_rate": 9.6610262317255e-05, "loss": 7.1089, "step": 5030 }, { "epoch": 0.2893613763803035, "grad_norm": 1.6171875, "learning_rate": 9.659341612626618e-05, "loss": 7.0617, "step": 5040 }, { "epoch": 0.2899355060953438, "grad_norm": 1.6953125, "learning_rate": 9.657652965436521e-05, "loss": 7.1023, "step": 5050 }, { "epoch": 0.2905096358103841, "grad_norm": 1.6328125, "learning_rate": 9.655960291615081e-05, "loss": 7.0837, "step": 5060 }, { "epoch": 0.29108376552542436, "grad_norm": 1.6171875, "learning_rate": 9.654263592625645e-05, "loss": 7.0944, "step": 5070 }, { "epoch": 0.29165789524046465, "grad_norm": 1.609375, "learning_rate": 9.652562869935045e-05, "loss": 7.0902, "step": 5080 }, { "epoch": 0.29223202495550493, "grad_norm": 1.640625, "learning_rate": 9.650858125013584e-05, "loss": 7.0896, "step": 5090 }, { "epoch": 0.2928061546705452, "grad_norm": 1.6171875, "learning_rate": 9.649149359335053e-05, "loss": 7.0806, "step": 5100 }, { "epoch": 0.2933802843855855, "grad_norm": 1.6875, "learning_rate": 9.647436574376708e-05, "loss": 7.1028, "step": 5110 }, { "epoch": 0.2939544141006258, "grad_norm": 1.5859375, "learning_rate": 9.645719771619288e-05, "loss": 7.0584, "step": 5120 }, { "epoch": 0.29452854381566607, "grad_norm": 1.6015625, "learning_rate": 9.643998952547002e-05, "loss": 7.0907, "step": 5130 }, { "epoch": 0.29510267353070635, "grad_norm": 1.7265625, "learning_rate": 9.642274118647529e-05, "loss": 7.1007, "step": 5140 }, { "epoch": 0.29567680324574663, "grad_norm": 1.6328125, "learning_rate": 9.640545271412024e-05, "loss": 7.1318, "step": 5150 }, { "epoch": 0.2962509329607869, "grad_norm": 1.6796875, "learning_rate": 9.638812412335108e-05, "loss": 7.1222, "step": 5160 }, { "epoch": 0.2968250626758272, "grad_norm": 1.625, "learning_rate": 9.63707554291487e-05, "loss": 7.0565, "step": 5170 }, { "epoch": 0.2973991923908675, "grad_norm": 1.6015625, "learning_rate": 9.63533466465287e-05, "loss": 7.0986, "step": 5180 }, { "epoch": 0.29797332210590777, "grad_norm": 1.546875, "learning_rate": 9.633589779054131e-05, "loss": 7.0853, "step": 5190 }, { "epoch": 0.2985474518209481, "grad_norm": 1.6875, "learning_rate": 9.631840887627138e-05, "loss": 7.1219, "step": 5200 }, { "epoch": 0.2991215815359884, "grad_norm": 1.6328125, "learning_rate": 9.630087991883843e-05, "loss": 7.0556, "step": 5210 }, { "epoch": 0.2996957112510287, "grad_norm": 1.5625, "learning_rate": 9.628331093339657e-05, "loss": 7.0984, "step": 5220 }, { "epoch": 0.30026984096606896, "grad_norm": 1.6953125, "learning_rate": 9.626570193513456e-05, "loss": 7.0877, "step": 5230 }, { "epoch": 0.30084397068110924, "grad_norm": 1.6328125, "learning_rate": 9.624805293927568e-05, "loss": 7.1067, "step": 5240 }, { "epoch": 0.3014181003961495, "grad_norm": 1.5625, "learning_rate": 9.623036396107785e-05, "loss": 7.1021, "step": 5250 }, { "epoch": 0.3019922301111898, "grad_norm": 1.6484375, "learning_rate": 9.621263501583356e-05, "loss": 7.1186, "step": 5260 }, { "epoch": 0.3025663598262301, "grad_norm": 1.5, "learning_rate": 9.619486611886976e-05, "loss": 7.0824, "step": 5270 }, { "epoch": 0.3031404895412704, "grad_norm": 1.6015625, "learning_rate": 9.617705728554807e-05, "loss": 7.1194, "step": 5280 }, { "epoch": 0.30371461925631066, "grad_norm": 1.671875, "learning_rate": 9.615920853126456e-05, "loss": 7.1096, "step": 5290 }, { "epoch": 0.30428874897135094, "grad_norm": 1.7265625, "learning_rate": 9.61413198714498e-05, "loss": 7.1142, "step": 5300 }, { "epoch": 0.3048628786863912, "grad_norm": 1.5703125, "learning_rate": 9.612339132156889e-05, "loss": 7.0574, "step": 5310 }, { "epoch": 0.3054370084014315, "grad_norm": 1.609375, "learning_rate": 9.610542289712143e-05, "loss": 7.1176, "step": 5320 }, { "epoch": 0.3060111381164718, "grad_norm": 1.7421875, "learning_rate": 9.608741461364145e-05, "loss": 7.09, "step": 5330 }, { "epoch": 0.3065852678315121, "grad_norm": 1.71875, "learning_rate": 9.60693664866975e-05, "loss": 7.1117, "step": 5340 }, { "epoch": 0.30715939754655236, "grad_norm": 1.6953125, "learning_rate": 9.605127853189246e-05, "loss": 7.0935, "step": 5350 }, { "epoch": 0.30773352726159264, "grad_norm": 1.5625, "learning_rate": 9.603315076486378e-05, "loss": 7.0575, "step": 5360 }, { "epoch": 0.3083076569766329, "grad_norm": 1.625, "learning_rate": 9.601498320128324e-05, "loss": 7.0817, "step": 5370 }, { "epoch": 0.3088817866916732, "grad_norm": 1.640625, "learning_rate": 9.599677585685707e-05, "loss": 7.0864, "step": 5380 }, { "epoch": 0.3094559164067135, "grad_norm": 1.640625, "learning_rate": 9.597852874732585e-05, "loss": 7.1067, "step": 5390 }, { "epoch": 0.3100300461217538, "grad_norm": 1.6796875, "learning_rate": 9.596024188846459e-05, "loss": 7.1235, "step": 5400 }, { "epoch": 0.31060417583679406, "grad_norm": 1.6875, "learning_rate": 9.59419152960826e-05, "loss": 7.1188, "step": 5410 }, { "epoch": 0.31117830555183434, "grad_norm": 1.6015625, "learning_rate": 9.59235489860236e-05, "loss": 7.1008, "step": 5420 }, { "epoch": 0.31175243526687463, "grad_norm": 1.6171875, "learning_rate": 9.590514297416561e-05, "loss": 7.0965, "step": 5430 }, { "epoch": 0.3123265649819149, "grad_norm": 1.6953125, "learning_rate": 9.588669727642099e-05, "loss": 7.1121, "step": 5440 }, { "epoch": 0.3129006946969552, "grad_norm": 1.703125, "learning_rate": 9.586821190873639e-05, "loss": 7.1177, "step": 5450 }, { "epoch": 0.3134748244119955, "grad_norm": 1.609375, "learning_rate": 9.584968688709279e-05, "loss": 7.1023, "step": 5460 }, { "epoch": 0.31404895412703576, "grad_norm": 1.6171875, "learning_rate": 9.58311222275054e-05, "loss": 7.1225, "step": 5470 }, { "epoch": 0.31462308384207605, "grad_norm": 1.6953125, "learning_rate": 9.581251794602377e-05, "loss": 7.0741, "step": 5480 }, { "epoch": 0.31519721355711633, "grad_norm": 1.640625, "learning_rate": 9.579387405873164e-05, "loss": 7.1023, "step": 5490 }, { "epoch": 0.3157713432721566, "grad_norm": 1.6015625, "learning_rate": 9.5775190581747e-05, "loss": 7.1073, "step": 5500 }, { "epoch": 0.3163454729871969, "grad_norm": 1.6171875, "learning_rate": 9.57564675312221e-05, "loss": 7.0511, "step": 5510 }, { "epoch": 0.3169196027022372, "grad_norm": 1.6171875, "learning_rate": 9.573770492334338e-05, "loss": 7.0816, "step": 5520 }, { "epoch": 0.31749373241727746, "grad_norm": 1.71875, "learning_rate": 9.571890277433144e-05, "loss": 7.1016, "step": 5530 }, { "epoch": 0.31806786213231775, "grad_norm": 1.65625, "learning_rate": 9.570006110044116e-05, "loss": 7.0907, "step": 5540 }, { "epoch": 0.31864199184735803, "grad_norm": 1.6484375, "learning_rate": 9.568117991796148e-05, "loss": 7.078, "step": 5550 }, { "epoch": 0.3192161215623983, "grad_norm": 1.6328125, "learning_rate": 9.56622592432156e-05, "loss": 7.0899, "step": 5560 }, { "epoch": 0.3197902512774386, "grad_norm": 1.6484375, "learning_rate": 9.564329909256078e-05, "loss": 7.1083, "step": 5570 }, { "epoch": 0.3203643809924789, "grad_norm": 1.6015625, "learning_rate": 9.562429948238842e-05, "loss": 7.0546, "step": 5580 }, { "epoch": 0.32093851070751916, "grad_norm": 1.59375, "learning_rate": 9.56052604291241e-05, "loss": 7.0755, "step": 5590 }, { "epoch": 0.32151264042255945, "grad_norm": 1.5390625, "learning_rate": 9.55861819492274e-05, "loss": 7.0788, "step": 5600 }, { "epoch": 0.32208677013759973, "grad_norm": 1.6640625, "learning_rate": 9.556706405919208e-05, "loss": 7.0758, "step": 5610 }, { "epoch": 0.32266089985264, "grad_norm": 1.5859375, "learning_rate": 9.55479067755459e-05, "loss": 7.0838, "step": 5620 }, { "epoch": 0.3232350295676803, "grad_norm": 1.5703125, "learning_rate": 9.552871011485071e-05, "loss": 7.0924, "step": 5630 }, { "epoch": 0.3238091592827206, "grad_norm": 1.625, "learning_rate": 9.550947409370239e-05, "loss": 7.0698, "step": 5640 }, { "epoch": 0.3243832889977609, "grad_norm": 1.6640625, "learning_rate": 9.549019872873087e-05, "loss": 7.0464, "step": 5650 }, { "epoch": 0.3249574187128012, "grad_norm": 1.546875, "learning_rate": 9.547088403660005e-05, "loss": 7.0699, "step": 5660 }, { "epoch": 0.3255315484278415, "grad_norm": 1.5859375, "learning_rate": 9.545153003400789e-05, "loss": 7.0841, "step": 5670 }, { "epoch": 0.32610567814288177, "grad_norm": 1.59375, "learning_rate": 9.543213673768627e-05, "loss": 7.0842, "step": 5680 }, { "epoch": 0.32667980785792206, "grad_norm": 1.6875, "learning_rate": 9.541270416440109e-05, "loss": 7.0989, "step": 5690 }, { "epoch": 0.32725393757296234, "grad_norm": 1.578125, "learning_rate": 9.539323233095219e-05, "loss": 7.0961, "step": 5700 }, { "epoch": 0.3278280672880026, "grad_norm": 1.59375, "learning_rate": 9.537372125417333e-05, "loss": 7.0765, "step": 5710 }, { "epoch": 0.3284021970030429, "grad_norm": 1.6171875, "learning_rate": 9.535417095093222e-05, "loss": 7.0622, "step": 5720 }, { "epoch": 0.3289763267180832, "grad_norm": 1.6484375, "learning_rate": 9.533458143813048e-05, "loss": 7.0771, "step": 5730 }, { "epoch": 0.3295504564331235, "grad_norm": 1.6484375, "learning_rate": 9.531495273270363e-05, "loss": 7.07, "step": 5740 }, { "epoch": 0.33012458614816376, "grad_norm": 1.609375, "learning_rate": 9.529528485162105e-05, "loss": 7.0796, "step": 5750 }, { "epoch": 0.33069871586320404, "grad_norm": 1.7109375, "learning_rate": 9.527557781188602e-05, "loss": 7.1156, "step": 5760 }, { "epoch": 0.3312728455782443, "grad_norm": 1.6875, "learning_rate": 9.525583163053566e-05, "loss": 7.0224, "step": 5770 }, { "epoch": 0.3318469752932846, "grad_norm": 1.6640625, "learning_rate": 9.523604632464092e-05, "loss": 7.0701, "step": 5780 }, { "epoch": 0.3324211050083249, "grad_norm": 1.6015625, "learning_rate": 9.521622191130656e-05, "loss": 7.0623, "step": 5790 }, { "epoch": 0.3329952347233652, "grad_norm": 1.5703125, "learning_rate": 9.51963584076712e-05, "loss": 7.1004, "step": 5800 }, { "epoch": 0.33356936443840546, "grad_norm": 1.671875, "learning_rate": 9.517645583090722e-05, "loss": 7.0978, "step": 5810 }, { "epoch": 0.33414349415344574, "grad_norm": 1.5703125, "learning_rate": 9.515651419822077e-05, "loss": 7.0835, "step": 5820 }, { "epoch": 0.334717623868486, "grad_norm": 1.6171875, "learning_rate": 9.513653352685179e-05, "loss": 7.0948, "step": 5830 }, { "epoch": 0.3352917535835263, "grad_norm": 1.6015625, "learning_rate": 9.511651383407395e-05, "loss": 7.0186, "step": 5840 }, { "epoch": 0.3358658832985666, "grad_norm": 1.6328125, "learning_rate": 9.509645513719467e-05, "loss": 7.0853, "step": 5850 }, { "epoch": 0.3364400130136069, "grad_norm": 1.5859375, "learning_rate": 9.507635745355509e-05, "loss": 7.0859, "step": 5860 }, { "epoch": 0.33701414272864716, "grad_norm": 1.7265625, "learning_rate": 9.505622080053003e-05, "loss": 7.0422, "step": 5870 }, { "epoch": 0.33758827244368744, "grad_norm": 1.6328125, "learning_rate": 9.503604519552803e-05, "loss": 7.0574, "step": 5880 }, { "epoch": 0.3381624021587277, "grad_norm": 1.5390625, "learning_rate": 9.501583065599127e-05, "loss": 7.062, "step": 5890 }, { "epoch": 0.338736531873768, "grad_norm": 1.53125, "learning_rate": 9.499557719939564e-05, "loss": 7.0634, "step": 5900 }, { "epoch": 0.3393106615888083, "grad_norm": 1.6171875, "learning_rate": 9.497528484325062e-05, "loss": 7.0739, "step": 5910 }, { "epoch": 0.3398847913038486, "grad_norm": 1.6953125, "learning_rate": 9.495495360509937e-05, "loss": 7.0782, "step": 5920 }, { "epoch": 0.34045892101888886, "grad_norm": 1.71875, "learning_rate": 9.493458350251862e-05, "loss": 7.0697, "step": 5930 }, { "epoch": 0.34103305073392914, "grad_norm": 1.671875, "learning_rate": 9.491417455311875e-05, "loss": 7.0745, "step": 5940 }, { "epoch": 0.3416071804489694, "grad_norm": 1.578125, "learning_rate": 9.489372677454365e-05, "loss": 7.0831, "step": 5950 }, { "epoch": 0.3421813101640097, "grad_norm": 1.7109375, "learning_rate": 9.487324018447086e-05, "loss": 7.0725, "step": 5960 }, { "epoch": 0.34275543987905, "grad_norm": 1.546875, "learning_rate": 9.48527148006114e-05, "loss": 7.103, "step": 5970 }, { "epoch": 0.3433295695940903, "grad_norm": 1.5859375, "learning_rate": 9.48321506407099e-05, "loss": 7.0543, "step": 5980 }, { "epoch": 0.34390369930913056, "grad_norm": 1.640625, "learning_rate": 9.481154772254444e-05, "loss": 7.1007, "step": 5990 }, { "epoch": 0.34447782902417085, "grad_norm": 1.7109375, "learning_rate": 9.479090606392664e-05, "loss": 7.0926, "step": 6000 }, { "epoch": 0.34505195873921113, "grad_norm": 1.6875, "learning_rate": 9.477022568270166e-05, "loss": 7.0797, "step": 6010 }, { "epoch": 0.3456260884542514, "grad_norm": 1.671875, "learning_rate": 9.474950659674804e-05, "loss": 7.0643, "step": 6020 }, { "epoch": 0.3462002181692917, "grad_norm": 1.59375, "learning_rate": 9.472874882397786e-05, "loss": 7.0753, "step": 6030 }, { "epoch": 0.346774347884332, "grad_norm": 1.6796875, "learning_rate": 9.470795238233662e-05, "loss": 7.0945, "step": 6040 }, { "epoch": 0.34734847759937226, "grad_norm": 1.6875, "learning_rate": 9.468711728980323e-05, "loss": 7.055, "step": 6050 }, { "epoch": 0.34792260731441255, "grad_norm": 1.5703125, "learning_rate": 9.466624356439004e-05, "loss": 7.0759, "step": 6060 }, { "epoch": 0.34849673702945283, "grad_norm": 1.5859375, "learning_rate": 9.46453312241428e-05, "loss": 7.0866, "step": 6070 }, { "epoch": 0.3490708667444931, "grad_norm": 1.640625, "learning_rate": 9.462438028714061e-05, "loss": 7.0849, "step": 6080 }, { "epoch": 0.3496449964595334, "grad_norm": 1.6328125, "learning_rate": 9.460339077149597e-05, "loss": 7.101, "step": 6090 }, { "epoch": 0.35021912617457374, "grad_norm": 1.640625, "learning_rate": 9.458236269535476e-05, "loss": 7.0593, "step": 6100 }, { "epoch": 0.350793255889614, "grad_norm": 1.625, "learning_rate": 9.45612960768961e-05, "loss": 7.0364, "step": 6110 }, { "epoch": 0.3513673856046543, "grad_norm": 1.609375, "learning_rate": 9.454019093433253e-05, "loss": 7.0575, "step": 6120 }, { "epoch": 0.3519415153196946, "grad_norm": 1.5, "learning_rate": 9.451904728590983e-05, "loss": 7.1114, "step": 6130 }, { "epoch": 0.35251564503473487, "grad_norm": 1.6484375, "learning_rate": 9.449786514990713e-05, "loss": 7.0604, "step": 6140 }, { "epoch": 0.35308977474977515, "grad_norm": 1.546875, "learning_rate": 9.447664454463677e-05, "loss": 7.0818, "step": 6150 }, { "epoch": 0.35366390446481544, "grad_norm": 1.5546875, "learning_rate": 9.445538548844436e-05, "loss": 7.0759, "step": 6160 }, { "epoch": 0.3542380341798557, "grad_norm": 1.6171875, "learning_rate": 9.443408799970884e-05, "loss": 7.0768, "step": 6170 }, { "epoch": 0.354812163894896, "grad_norm": 1.6171875, "learning_rate": 9.441275209684219e-05, "loss": 7.0732, "step": 6180 }, { "epoch": 0.3553862936099363, "grad_norm": 1.6328125, "learning_rate": 9.43913777982898e-05, "loss": 7.0547, "step": 6190 }, { "epoch": 0.35596042332497657, "grad_norm": 1.640625, "learning_rate": 9.436996512253013e-05, "loss": 7.0371, "step": 6200 }, { "epoch": 0.35653455304001685, "grad_norm": 1.6328125, "learning_rate": 9.434851408807487e-05, "loss": 7.0853, "step": 6210 }, { "epoch": 0.35710868275505714, "grad_norm": 1.640625, "learning_rate": 9.432702471346884e-05, "loss": 7.0577, "step": 6220 }, { "epoch": 0.3576828124700974, "grad_norm": 1.671875, "learning_rate": 9.430549701729004e-05, "loss": 7.066, "step": 6230 }, { "epoch": 0.3582569421851377, "grad_norm": 1.625, "learning_rate": 9.428393101814954e-05, "loss": 7.0415, "step": 6240 }, { "epoch": 0.358831071900178, "grad_norm": 1.625, "learning_rate": 9.426232673469162e-05, "loss": 7.0349, "step": 6250 }, { "epoch": 0.3594052016152183, "grad_norm": 1.5546875, "learning_rate": 9.424068418559356e-05, "loss": 7.0604, "step": 6260 }, { "epoch": 0.35997933133025856, "grad_norm": 1.609375, "learning_rate": 9.421900338956578e-05, "loss": 7.0384, "step": 6270 }, { "epoch": 0.36055346104529884, "grad_norm": 1.6015625, "learning_rate": 9.419728436535176e-05, "loss": 7.0879, "step": 6280 }, { "epoch": 0.3611275907603391, "grad_norm": 1.59375, "learning_rate": 9.4175527131728e-05, "loss": 7.0746, "step": 6290 }, { "epoch": 0.3617017204753794, "grad_norm": 1.515625, "learning_rate": 9.415373170750404e-05, "loss": 7.0225, "step": 6300 }, { "epoch": 0.3622758501904197, "grad_norm": 1.5625, "learning_rate": 9.413189811152247e-05, "loss": 7.0722, "step": 6310 }, { "epoch": 0.36284997990546, "grad_norm": 1.6015625, "learning_rate": 9.411002636265886e-05, "loss": 7.0454, "step": 6320 }, { "epoch": 0.36342410962050026, "grad_norm": 1.7578125, "learning_rate": 9.408811647982176e-05, "loss": 7.0908, "step": 6330 }, { "epoch": 0.36399823933554054, "grad_norm": 1.6953125, "learning_rate": 9.406616848195266e-05, "loss": 7.0544, "step": 6340 }, { "epoch": 0.3645723690505808, "grad_norm": 1.7734375, "learning_rate": 9.404418238802606e-05, "loss": 7.0017, "step": 6350 }, { "epoch": 0.3651464987656211, "grad_norm": 1.609375, "learning_rate": 9.402215821704935e-05, "loss": 7.0659, "step": 6360 }, { "epoch": 0.3657206284806614, "grad_norm": 1.5546875, "learning_rate": 9.400009598806287e-05, "loss": 7.0969, "step": 6370 }, { "epoch": 0.3662947581957017, "grad_norm": 1.5625, "learning_rate": 9.397799572013982e-05, "loss": 7.028, "step": 6380 }, { "epoch": 0.36686888791074196, "grad_norm": 1.6171875, "learning_rate": 9.395585743238633e-05, "loss": 7.0821, "step": 6390 }, { "epoch": 0.36744301762578224, "grad_norm": 1.7109375, "learning_rate": 9.393368114394136e-05, "loss": 7.0553, "step": 6400 }, { "epoch": 0.3680171473408225, "grad_norm": 1.6484375, "learning_rate": 9.391146687397676e-05, "loss": 7.0522, "step": 6410 }, { "epoch": 0.3685912770558628, "grad_norm": 1.6796875, "learning_rate": 9.388921464169719e-05, "loss": 7.017, "step": 6420 }, { "epoch": 0.3691654067709031, "grad_norm": 1.578125, "learning_rate": 9.386692446634016e-05, "loss": 7.0541, "step": 6430 }, { "epoch": 0.3697395364859434, "grad_norm": 1.6484375, "learning_rate": 9.38445963671759e-05, "loss": 7.069, "step": 6440 }, { "epoch": 0.37031366620098366, "grad_norm": 1.640625, "learning_rate": 9.382223036350755e-05, "loss": 7.0844, "step": 6450 }, { "epoch": 0.37088779591602394, "grad_norm": 1.671875, "learning_rate": 9.379982647467091e-05, "loss": 7.0528, "step": 6460 }, { "epoch": 0.3714619256310642, "grad_norm": 1.640625, "learning_rate": 9.37773847200346e-05, "loss": 7.0136, "step": 6470 }, { "epoch": 0.3720360553461045, "grad_norm": 1.6484375, "learning_rate": 9.375490511899994e-05, "loss": 7.0743, "step": 6480 }, { "epoch": 0.3726101850611448, "grad_norm": 1.59375, "learning_rate": 9.373238769100098e-05, "loss": 7.0748, "step": 6490 }, { "epoch": 0.3731843147761851, "grad_norm": 1.59375, "learning_rate": 9.370983245550449e-05, "loss": 7.0398, "step": 6500 }, { "epoch": 0.37375844449122536, "grad_norm": 1.5390625, "learning_rate": 9.36872394320099e-05, "loss": 7.0709, "step": 6510 }, { "epoch": 0.37433257420626564, "grad_norm": 1.5546875, "learning_rate": 9.36646086400493e-05, "loss": 7.0296, "step": 6520 }, { "epoch": 0.37490670392130593, "grad_norm": 1.671875, "learning_rate": 9.36419400991875e-05, "loss": 7.0657, "step": 6530 }, { "epoch": 0.3754808336363462, "grad_norm": 1.6484375, "learning_rate": 9.361923382902182e-05, "loss": 7.0604, "step": 6540 }, { "epoch": 0.37605496335138655, "grad_norm": 1.6171875, "learning_rate": 9.359648984918232e-05, "loss": 7.0412, "step": 6550 }, { "epoch": 0.37662909306642683, "grad_norm": 1.734375, "learning_rate": 9.35737081793316e-05, "loss": 7.0582, "step": 6560 }, { "epoch": 0.3772032227814671, "grad_norm": 1.6484375, "learning_rate": 9.355088883916485e-05, "loss": 7.0994, "step": 6570 }, { "epoch": 0.3777773524965074, "grad_norm": 1.6015625, "learning_rate": 9.352803184840983e-05, "loss": 7.0822, "step": 6580 }, { "epoch": 0.3783514822115477, "grad_norm": 1.484375, "learning_rate": 9.350513722682687e-05, "loss": 7.0721, "step": 6590 }, { "epoch": 0.37892561192658797, "grad_norm": 1.59375, "learning_rate": 9.34822049942088e-05, "loss": 7.0822, "step": 6600 }, { "epoch": 0.37949974164162825, "grad_norm": 1.609375, "learning_rate": 9.3459235170381e-05, "loss": 7.0448, "step": 6610 }, { "epoch": 0.38007387135666854, "grad_norm": 1.625, "learning_rate": 9.343622777520129e-05, "loss": 7.037, "step": 6620 }, { "epoch": 0.3806480010717088, "grad_norm": 1.6328125, "learning_rate": 9.341318282856004e-05, "loss": 7.0463, "step": 6630 }, { "epoch": 0.3812221307867491, "grad_norm": 1.6953125, "learning_rate": 9.339010035038005e-05, "loss": 7.0267, "step": 6640 }, { "epoch": 0.3817962605017894, "grad_norm": 1.625, "learning_rate": 9.336698036061657e-05, "loss": 7.0255, "step": 6650 }, { "epoch": 0.38237039021682967, "grad_norm": 1.609375, "learning_rate": 9.334382287925726e-05, "loss": 7.0543, "step": 6660 }, { "epoch": 0.38294451993186995, "grad_norm": 1.609375, "learning_rate": 9.332062792632223e-05, "loss": 7.0655, "step": 6670 }, { "epoch": 0.38351864964691024, "grad_norm": 1.546875, "learning_rate": 9.329739552186396e-05, "loss": 7.012, "step": 6680 }, { "epoch": 0.3840927793619505, "grad_norm": 1.609375, "learning_rate": 9.327412568596735e-05, "loss": 7.0628, "step": 6690 }, { "epoch": 0.3846669090769908, "grad_norm": 1.6015625, "learning_rate": 9.325081843874954e-05, "loss": 7.0552, "step": 6700 }, { "epoch": 0.3852410387920311, "grad_norm": 1.6875, "learning_rate": 9.322747380036019e-05, "loss": 7.0691, "step": 6710 }, { "epoch": 0.38581516850707137, "grad_norm": 1.65625, "learning_rate": 9.320409179098113e-05, "loss": 7.0627, "step": 6720 }, { "epoch": 0.38638929822211165, "grad_norm": 1.6171875, "learning_rate": 9.31806724308266e-05, "loss": 7.0231, "step": 6730 }, { "epoch": 0.38696342793715194, "grad_norm": 1.6328125, "learning_rate": 9.315721574014307e-05, "loss": 7.0489, "step": 6740 }, { "epoch": 0.3875375576521922, "grad_norm": 1.6171875, "learning_rate": 9.31337217392093e-05, "loss": 7.0758, "step": 6750 }, { "epoch": 0.3881116873672325, "grad_norm": 1.6015625, "learning_rate": 9.311019044833631e-05, "loss": 7.0796, "step": 6760 }, { "epoch": 0.3886858170822728, "grad_norm": 1.671875, "learning_rate": 9.308662188786738e-05, "loss": 7.0804, "step": 6770 }, { "epoch": 0.38925994679731307, "grad_norm": 1.546875, "learning_rate": 9.306301607817797e-05, "loss": 7.0535, "step": 6780 }, { "epoch": 0.38983407651235336, "grad_norm": 1.5625, "learning_rate": 9.303937303967578e-05, "loss": 7.0569, "step": 6790 }, { "epoch": 0.39040820622739364, "grad_norm": 1.5546875, "learning_rate": 9.301569279280063e-05, "loss": 7.0705, "step": 6800 }, { "epoch": 0.3909823359424339, "grad_norm": 1.5546875, "learning_rate": 9.29919753580246e-05, "loss": 7.0721, "step": 6810 }, { "epoch": 0.3915564656574742, "grad_norm": 1.6484375, "learning_rate": 9.296822075585185e-05, "loss": 7.1056, "step": 6820 }, { "epoch": 0.3921305953725145, "grad_norm": 1.6171875, "learning_rate": 9.29444290068187e-05, "loss": 7.0806, "step": 6830 }, { "epoch": 0.3927047250875548, "grad_norm": 1.65625, "learning_rate": 9.292060013149357e-05, "loss": 6.9807, "step": 6840 }, { "epoch": 0.39327885480259506, "grad_norm": 1.6171875, "learning_rate": 9.289673415047701e-05, "loss": 7.0552, "step": 6850 }, { "epoch": 0.39385298451763534, "grad_norm": 1.6796875, "learning_rate": 9.287283108440159e-05, "loss": 7.0737, "step": 6860 }, { "epoch": 0.3944271142326756, "grad_norm": 1.65625, "learning_rate": 9.2848890953932e-05, "loss": 7.0574, "step": 6870 }, { "epoch": 0.3950012439477159, "grad_norm": 1.6796875, "learning_rate": 9.282491377976494e-05, "loss": 7.0671, "step": 6880 }, { "epoch": 0.3955753736627562, "grad_norm": 1.6953125, "learning_rate": 9.280089958262912e-05, "loss": 6.9983, "step": 6890 }, { "epoch": 0.3961495033777965, "grad_norm": 1.609375, "learning_rate": 9.277684838328532e-05, "loss": 7.0531, "step": 6900 }, { "epoch": 0.39672363309283676, "grad_norm": 1.6015625, "learning_rate": 9.275276020252624e-05, "loss": 7.0274, "step": 6910 }, { "epoch": 0.39729776280787704, "grad_norm": 1.640625, "learning_rate": 9.272863506117659e-05, "loss": 7.0561, "step": 6920 }, { "epoch": 0.3978718925229173, "grad_norm": 1.671875, "learning_rate": 9.270447298009301e-05, "loss": 7.0595, "step": 6930 }, { "epoch": 0.3984460222379576, "grad_norm": 1.5703125, "learning_rate": 9.26802739801641e-05, "loss": 7.0799, "step": 6940 }, { "epoch": 0.3990201519529979, "grad_norm": 1.578125, "learning_rate": 9.265603808231038e-05, "loss": 7.0158, "step": 6950 }, { "epoch": 0.3995942816680382, "grad_norm": 1.765625, "learning_rate": 9.263176530748422e-05, "loss": 7.0851, "step": 6960 }, { "epoch": 0.40016841138307846, "grad_norm": 1.75, "learning_rate": 9.260745567666992e-05, "loss": 7.0673, "step": 6970 }, { "epoch": 0.40074254109811874, "grad_norm": 1.65625, "learning_rate": 9.258310921088363e-05, "loss": 7.0224, "step": 6980 }, { "epoch": 0.401316670813159, "grad_norm": 1.6171875, "learning_rate": 9.255872593117334e-05, "loss": 7.0413, "step": 6990 }, { "epoch": 0.4018908005281993, "grad_norm": 1.640625, "learning_rate": 9.253430585861887e-05, "loss": 7.0657, "step": 7000 }, { "epoch": 0.40246493024323965, "grad_norm": 1.6640625, "learning_rate": 9.250984901433185e-05, "loss": 6.9849, "step": 7010 }, { "epoch": 0.40303905995827993, "grad_norm": 1.7265625, "learning_rate": 9.248535541945569e-05, "loss": 7.05, "step": 7020 }, { "epoch": 0.4036131896733202, "grad_norm": 1.6015625, "learning_rate": 9.246082509516558e-05, "loss": 7.0623, "step": 7030 }, { "epoch": 0.4041873193883605, "grad_norm": 1.625, "learning_rate": 9.243625806266845e-05, "loss": 7.0092, "step": 7040 }, { "epoch": 0.4047614491034008, "grad_norm": 1.6328125, "learning_rate": 9.2411654343203e-05, "loss": 7.0477, "step": 7050 }, { "epoch": 0.40533557881844107, "grad_norm": 1.6328125, "learning_rate": 9.238701395803962e-05, "loss": 7.0425, "step": 7060 }, { "epoch": 0.40590970853348135, "grad_norm": 1.5703125, "learning_rate": 9.236233692848035e-05, "loss": 7.0353, "step": 7070 }, { "epoch": 0.40648383824852163, "grad_norm": 1.6171875, "learning_rate": 9.233762327585905e-05, "loss": 7.0285, "step": 7080 }, { "epoch": 0.4070579679635619, "grad_norm": 1.625, "learning_rate": 9.231287302154107e-05, "loss": 7.0415, "step": 7090 }, { "epoch": 0.4076320976786022, "grad_norm": 1.640625, "learning_rate": 9.228808618692353e-05, "loss": 7.0454, "step": 7100 }, { "epoch": 0.4082062273936425, "grad_norm": 1.671875, "learning_rate": 9.226326279343512e-05, "loss": 7.0603, "step": 7110 }, { "epoch": 0.40878035710868277, "grad_norm": 1.5859375, "learning_rate": 9.223840286253613e-05, "loss": 7.0359, "step": 7120 }, { "epoch": 0.40935448682372305, "grad_norm": 1.625, "learning_rate": 9.221350641571848e-05, "loss": 7.0449, "step": 7130 }, { "epoch": 0.40992861653876334, "grad_norm": 1.6640625, "learning_rate": 9.21885734745056e-05, "loss": 7.0498, "step": 7140 }, { "epoch": 0.4105027462538036, "grad_norm": 1.6171875, "learning_rate": 9.216360406045254e-05, "loss": 7.0622, "step": 7150 }, { "epoch": 0.4110768759688439, "grad_norm": 1.6328125, "learning_rate": 9.213859819514581e-05, "loss": 7.0269, "step": 7160 }, { "epoch": 0.4116510056838842, "grad_norm": 1.6953125, "learning_rate": 9.211355590020348e-05, "loss": 7.061, "step": 7170 }, { "epoch": 0.41222513539892447, "grad_norm": 1.6171875, "learning_rate": 9.208847719727509e-05, "loss": 7.0471, "step": 7180 }, { "epoch": 0.41279926511396475, "grad_norm": 1.578125, "learning_rate": 9.206336210804167e-05, "loss": 7.0315, "step": 7190 }, { "epoch": 0.41337339482900504, "grad_norm": 1.6796875, "learning_rate": 9.203821065421571e-05, "loss": 7.0379, "step": 7200 }, { "epoch": 0.4139475245440453, "grad_norm": 1.6328125, "learning_rate": 9.201302285754114e-05, "loss": 7.0435, "step": 7210 }, { "epoch": 0.4145216542590856, "grad_norm": 1.703125, "learning_rate": 9.198779873979326e-05, "loss": 7.016, "step": 7220 }, { "epoch": 0.4150957839741259, "grad_norm": 1.5703125, "learning_rate": 9.196253832277883e-05, "loss": 7.0658, "step": 7230 }, { "epoch": 0.41566991368916617, "grad_norm": 1.5234375, "learning_rate": 9.193724162833598e-05, "loss": 7.0362, "step": 7240 }, { "epoch": 0.41624404340420645, "grad_norm": 1.6171875, "learning_rate": 9.191190867833419e-05, "loss": 7.0765, "step": 7250 }, { "epoch": 0.41681817311924674, "grad_norm": 1.546875, "learning_rate": 9.188653949467427e-05, "loss": 7.0459, "step": 7260 }, { "epoch": 0.417392302834287, "grad_norm": 1.5390625, "learning_rate": 9.186113409928838e-05, "loss": 7.0007, "step": 7270 }, { "epoch": 0.4179664325493273, "grad_norm": 1.7109375, "learning_rate": 9.183569251413999e-05, "loss": 7.0554, "step": 7280 }, { "epoch": 0.4185405622643676, "grad_norm": 1.609375, "learning_rate": 9.181021476122385e-05, "loss": 7.0235, "step": 7290 }, { "epoch": 0.41911469197940787, "grad_norm": 1.65625, "learning_rate": 9.178470086256593e-05, "loss": 7.0363, "step": 7300 }, { "epoch": 0.41968882169444816, "grad_norm": 1.7265625, "learning_rate": 9.175915084022353e-05, "loss": 7.0607, "step": 7310 }, { "epoch": 0.42026295140948844, "grad_norm": 1.703125, "learning_rate": 9.173356471628511e-05, "loss": 7.0638, "step": 7320 }, { "epoch": 0.4208370811245287, "grad_norm": 1.6171875, "learning_rate": 9.17079425128704e-05, "loss": 7.0519, "step": 7330 }, { "epoch": 0.421411210839569, "grad_norm": 1.625, "learning_rate": 9.168228425213028e-05, "loss": 7.0528, "step": 7340 }, { "epoch": 0.4219853405546093, "grad_norm": 1.5859375, "learning_rate": 9.165658995624681e-05, "loss": 6.9998, "step": 7350 }, { "epoch": 0.4225594702696496, "grad_norm": 1.6484375, "learning_rate": 9.163085964743321e-05, "loss": 7.0553, "step": 7360 }, { "epoch": 0.42313359998468986, "grad_norm": 1.5, "learning_rate": 9.160509334793384e-05, "loss": 7.087, "step": 7370 }, { "epoch": 0.42370772969973014, "grad_norm": 1.765625, "learning_rate": 9.157929108002414e-05, "loss": 7.0492, "step": 7380 }, { "epoch": 0.4242818594147704, "grad_norm": 1.6484375, "learning_rate": 9.155345286601069e-05, "loss": 7.0492, "step": 7390 }, { "epoch": 0.4248559891298107, "grad_norm": 1.6171875, "learning_rate": 9.152757872823113e-05, "loss": 7.0173, "step": 7400 }, { "epoch": 0.425430118844851, "grad_norm": 1.7109375, "learning_rate": 9.150166868905414e-05, "loss": 7.0533, "step": 7410 }, { "epoch": 0.4260042485598913, "grad_norm": 1.7265625, "learning_rate": 9.147572277087948e-05, "loss": 7.03, "step": 7420 }, { "epoch": 0.42657837827493156, "grad_norm": 1.578125, "learning_rate": 9.144974099613787e-05, "loss": 7.0407, "step": 7430 }, { "epoch": 0.42715250798997184, "grad_norm": 1.5625, "learning_rate": 9.142372338729108e-05, "loss": 7.0591, "step": 7440 }, { "epoch": 0.4277266377050121, "grad_norm": 1.6015625, "learning_rate": 9.139766996683181e-05, "loss": 7.0383, "step": 7450 }, { "epoch": 0.42830076742005246, "grad_norm": 1.5390625, "learning_rate": 9.137158075728377e-05, "loss": 7.0378, "step": 7460 }, { "epoch": 0.42887489713509275, "grad_norm": 1.6015625, "learning_rate": 9.134545578120157e-05, "loss": 7.0556, "step": 7470 }, { "epoch": 0.42944902685013303, "grad_norm": 1.5859375, "learning_rate": 9.131929506117078e-05, "loss": 7.0647, "step": 7480 }, { "epoch": 0.4300231565651733, "grad_norm": 1.6484375, "learning_rate": 9.129309861980783e-05, "loss": 7.0471, "step": 7490 }, { "epoch": 0.4305972862802136, "grad_norm": 1.625, "learning_rate": 9.126686647976008e-05, "loss": 7.0375, "step": 7500 }, { "epoch": 0.4311714159952539, "grad_norm": 1.5234375, "learning_rate": 9.124059866370571e-05, "loss": 7.0423, "step": 7510 }, { "epoch": 0.43174554571029417, "grad_norm": 1.578125, "learning_rate": 9.121429519435374e-05, "loss": 7.0172, "step": 7520 }, { "epoch": 0.43231967542533445, "grad_norm": 1.5390625, "learning_rate": 9.118795609444403e-05, "loss": 7.0096, "step": 7530 }, { "epoch": 0.43289380514037473, "grad_norm": 1.6171875, "learning_rate": 9.116158138674729e-05, "loss": 7.0544, "step": 7540 }, { "epoch": 0.433467934855415, "grad_norm": 1.625, "learning_rate": 9.11351710940649e-05, "loss": 7.0353, "step": 7550 }, { "epoch": 0.4340420645704553, "grad_norm": 1.6328125, "learning_rate": 9.110872523922911e-05, "loss": 7.0482, "step": 7560 }, { "epoch": 0.4346161942854956, "grad_norm": 1.515625, "learning_rate": 9.108224384510286e-05, "loss": 7.0514, "step": 7570 }, { "epoch": 0.43519032400053587, "grad_norm": 1.7109375, "learning_rate": 9.105572693457985e-05, "loss": 7.0284, "step": 7580 }, { "epoch": 0.43576445371557615, "grad_norm": 1.8046875, "learning_rate": 9.102917453058444e-05, "loss": 7.0148, "step": 7590 }, { "epoch": 0.43633858343061643, "grad_norm": 1.609375, "learning_rate": 9.100258665607171e-05, "loss": 7.0613, "step": 7600 }, { "epoch": 0.4369127131456567, "grad_norm": 1.6796875, "learning_rate": 9.097596333402738e-05, "loss": 7.0254, "step": 7610 }, { "epoch": 0.437486842860697, "grad_norm": 1.625, "learning_rate": 9.094930458746784e-05, "loss": 7.0785, "step": 7620 }, { "epoch": 0.4380609725757373, "grad_norm": 1.4765625, "learning_rate": 9.09226104394401e-05, "loss": 7.0231, "step": 7630 }, { "epoch": 0.43863510229077757, "grad_norm": 1.578125, "learning_rate": 9.089588091302176e-05, "loss": 7.0644, "step": 7640 }, { "epoch": 0.43920923200581785, "grad_norm": 1.6015625, "learning_rate": 9.086911603132103e-05, "loss": 7.0538, "step": 7650 }, { "epoch": 0.43978336172085813, "grad_norm": 1.515625, "learning_rate": 9.084231581747662e-05, "loss": 7.0289, "step": 7660 }, { "epoch": 0.4403574914358984, "grad_norm": 1.609375, "learning_rate": 9.081548029465789e-05, "loss": 7.0333, "step": 7670 }, { "epoch": 0.4409316211509387, "grad_norm": 1.609375, "learning_rate": 9.078860948606464e-05, "loss": 7.0796, "step": 7680 }, { "epoch": 0.441505750865979, "grad_norm": 1.671875, "learning_rate": 9.076170341492722e-05, "loss": 7.0289, "step": 7690 }, { "epoch": 0.44207988058101927, "grad_norm": 1.671875, "learning_rate": 9.073476210450646e-05, "loss": 7.0234, "step": 7700 }, { "epoch": 0.44265401029605955, "grad_norm": 1.609375, "learning_rate": 9.070778557809362e-05, "loss": 7.0468, "step": 7710 }, { "epoch": 0.44322814001109984, "grad_norm": 1.59375, "learning_rate": 9.068077385901043e-05, "loss": 7.048, "step": 7720 }, { "epoch": 0.4438022697261401, "grad_norm": 1.578125, "learning_rate": 9.065372697060908e-05, "loss": 7.0085, "step": 7730 }, { "epoch": 0.4443763994411804, "grad_norm": 1.53125, "learning_rate": 9.062664493627208e-05, "loss": 7.0594, "step": 7740 }, { "epoch": 0.4449505291562207, "grad_norm": 1.609375, "learning_rate": 9.059952777941241e-05, "loss": 7.0759, "step": 7750 }, { "epoch": 0.44552465887126097, "grad_norm": 1.6875, "learning_rate": 9.057237552347337e-05, "loss": 7.0201, "step": 7760 }, { "epoch": 0.44609878858630125, "grad_norm": 1.75, "learning_rate": 9.054518819192862e-05, "loss": 7.029, "step": 7770 }, { "epoch": 0.44667291830134154, "grad_norm": 1.5703125, "learning_rate": 9.051796580828212e-05, "loss": 7.0293, "step": 7780 }, { "epoch": 0.4472470480163818, "grad_norm": 1.6328125, "learning_rate": 9.049070839606813e-05, "loss": 6.9941, "step": 7790 }, { "epoch": 0.4478211777314221, "grad_norm": 1.609375, "learning_rate": 9.046341597885126e-05, "loss": 7.0569, "step": 7800 }, { "epoch": 0.4483953074464624, "grad_norm": 1.625, "learning_rate": 9.043608858022631e-05, "loss": 7.0543, "step": 7810 }, { "epoch": 0.44896943716150267, "grad_norm": 1.765625, "learning_rate": 9.040872622381834e-05, "loss": 7.0131, "step": 7820 }, { "epoch": 0.44954356687654295, "grad_norm": 1.578125, "learning_rate": 9.038132893328264e-05, "loss": 7.0364, "step": 7830 }, { "epoch": 0.45011769659158324, "grad_norm": 1.578125, "learning_rate": 9.035389673230472e-05, "loss": 7.0294, "step": 7840 }, { "epoch": 0.4506918263066235, "grad_norm": 1.75, "learning_rate": 9.032642964460023e-05, "loss": 7.046, "step": 7850 }, { "epoch": 0.4512659560216638, "grad_norm": 1.6484375, "learning_rate": 9.0298927693915e-05, "loss": 7.0224, "step": 7860 }, { "epoch": 0.4518400857367041, "grad_norm": 1.765625, "learning_rate": 9.027139090402499e-05, "loss": 7.0183, "step": 7870 }, { "epoch": 0.4524142154517444, "grad_norm": 1.7265625, "learning_rate": 9.024381929873631e-05, "loss": 7.037, "step": 7880 }, { "epoch": 0.45298834516678466, "grad_norm": 1.578125, "learning_rate": 9.021621290188516e-05, "loss": 7.0357, "step": 7890 }, { "epoch": 0.45356247488182494, "grad_norm": 1.6484375, "learning_rate": 9.018857173733776e-05, "loss": 7.0316, "step": 7900 }, { "epoch": 0.4541366045968653, "grad_norm": 1.71875, "learning_rate": 9.016089582899047e-05, "loss": 7.0234, "step": 7910 }, { "epoch": 0.45471073431190556, "grad_norm": 1.5546875, "learning_rate": 9.013318520076964e-05, "loss": 7.0453, "step": 7920 }, { "epoch": 0.45528486402694585, "grad_norm": 1.609375, "learning_rate": 9.010543987663165e-05, "loss": 7.0319, "step": 7930 }, { "epoch": 0.45585899374198613, "grad_norm": 1.609375, "learning_rate": 9.007765988056284e-05, "loss": 7.0268, "step": 7940 }, { "epoch": 0.4564331234570264, "grad_norm": 1.6171875, "learning_rate": 9.00498452365796e-05, "loss": 6.972, "step": 7950 }, { "epoch": 0.4570072531720667, "grad_norm": 1.6640625, "learning_rate": 9.002199596872821e-05, "loss": 7.062, "step": 7960 }, { "epoch": 0.457581382887107, "grad_norm": 1.59375, "learning_rate": 8.99941121010849e-05, "loss": 7.0066, "step": 7970 }, { "epoch": 0.45815551260214726, "grad_norm": 1.59375, "learning_rate": 8.996619365775583e-05, "loss": 7.0132, "step": 7980 }, { "epoch": 0.45872964231718755, "grad_norm": 1.5390625, "learning_rate": 8.993824066287699e-05, "loss": 7.0405, "step": 7990 }, { "epoch": 0.45930377203222783, "grad_norm": 1.6015625, "learning_rate": 8.991025314061434e-05, "loss": 7.0209, "step": 8000 }, { "epoch": 0.4598779017472681, "grad_norm": 1.5859375, "learning_rate": 8.988223111516363e-05, "loss": 7.0209, "step": 8010 }, { "epoch": 0.4604520314623084, "grad_norm": 1.7578125, "learning_rate": 8.98541746107504e-05, "loss": 7.0216, "step": 8020 }, { "epoch": 0.4610261611773487, "grad_norm": 1.6328125, "learning_rate": 8.982608365163009e-05, "loss": 7.0717, "step": 8030 }, { "epoch": 0.46160029089238896, "grad_norm": 1.6875, "learning_rate": 8.979795826208785e-05, "loss": 7.0264, "step": 8040 }, { "epoch": 0.46217442060742925, "grad_norm": 1.5859375, "learning_rate": 8.976979846643865e-05, "loss": 7.0146, "step": 8050 }, { "epoch": 0.46274855032246953, "grad_norm": 1.6484375, "learning_rate": 8.974160428902716e-05, "loss": 7.012, "step": 8060 }, { "epoch": 0.4633226800375098, "grad_norm": 1.734375, "learning_rate": 8.97133757542278e-05, "loss": 7.0154, "step": 8070 }, { "epoch": 0.4638968097525501, "grad_norm": 1.59375, "learning_rate": 8.968511288644468e-05, "loss": 6.9963, "step": 8080 }, { "epoch": 0.4644709394675904, "grad_norm": 1.5859375, "learning_rate": 8.96568157101116e-05, "loss": 7.0217, "step": 8090 }, { "epoch": 0.46504506918263067, "grad_norm": 1.59375, "learning_rate": 8.962848424969201e-05, "loss": 7.0337, "step": 8100 }, { "epoch": 0.46561919889767095, "grad_norm": 1.6640625, "learning_rate": 8.960011852967904e-05, "loss": 7.0316, "step": 8110 }, { "epoch": 0.46619332861271123, "grad_norm": 1.6328125, "learning_rate": 8.957171857459538e-05, "loss": 7.0226, "step": 8120 }, { "epoch": 0.4667674583277515, "grad_norm": 1.6015625, "learning_rate": 8.954328440899334e-05, "loss": 7.0425, "step": 8130 }, { "epoch": 0.4673415880427918, "grad_norm": 1.6640625, "learning_rate": 8.95148160574548e-05, "loss": 7.0096, "step": 8140 }, { "epoch": 0.4679157177578321, "grad_norm": 1.5703125, "learning_rate": 8.948631354459123e-05, "loss": 7.0395, "step": 8150 }, { "epoch": 0.46848984747287237, "grad_norm": 1.6953125, "learning_rate": 8.945777689504357e-05, "loss": 7.0209, "step": 8160 }, { "epoch": 0.46906397718791265, "grad_norm": 1.6875, "learning_rate": 8.942920613348235e-05, "loss": 7.0099, "step": 8170 }, { "epoch": 0.46963810690295293, "grad_norm": 1.6328125, "learning_rate": 8.940060128460752e-05, "loss": 7.0199, "step": 8180 }, { "epoch": 0.4702122366179932, "grad_norm": 1.7109375, "learning_rate": 8.937196237314853e-05, "loss": 7.0487, "step": 8190 }, { "epoch": 0.4707863663330335, "grad_norm": 1.6015625, "learning_rate": 8.934328942386427e-05, "loss": 7.0168, "step": 8200 }, { "epoch": 0.4713604960480738, "grad_norm": 1.7578125, "learning_rate": 8.931458246154307e-05, "loss": 7.0126, "step": 8210 }, { "epoch": 0.47193462576311407, "grad_norm": 1.5234375, "learning_rate": 8.928584151100265e-05, "loss": 7.0564, "step": 8220 }, { "epoch": 0.47250875547815435, "grad_norm": 1.6640625, "learning_rate": 8.925706659709014e-05, "loss": 7.0058, "step": 8230 }, { "epoch": 0.47308288519319464, "grad_norm": 1.578125, "learning_rate": 8.922825774468198e-05, "loss": 7.0144, "step": 8240 }, { "epoch": 0.4736570149082349, "grad_norm": 1.5859375, "learning_rate": 8.919941497868398e-05, "loss": 7.0205, "step": 8250 }, { "epoch": 0.4742311446232752, "grad_norm": 1.6796875, "learning_rate": 8.917053832403131e-05, "loss": 7.0391, "step": 8260 }, { "epoch": 0.4748052743383155, "grad_norm": 1.6484375, "learning_rate": 8.914162780568836e-05, "loss": 7.004, "step": 8270 }, { "epoch": 0.47537940405335577, "grad_norm": 1.59375, "learning_rate": 8.911268344864885e-05, "loss": 6.9943, "step": 8280 }, { "epoch": 0.47595353376839605, "grad_norm": 1.625, "learning_rate": 8.908370527793573e-05, "loss": 7.0023, "step": 8290 }, { "epoch": 0.47652766348343634, "grad_norm": 1.6171875, "learning_rate": 8.905469331860121e-05, "loss": 6.9991, "step": 8300 }, { "epoch": 0.4771017931984766, "grad_norm": 1.6328125, "learning_rate": 8.902564759572667e-05, "loss": 7.0314, "step": 8310 }, { "epoch": 0.4776759229135169, "grad_norm": 1.7421875, "learning_rate": 8.899656813442273e-05, "loss": 7.0244, "step": 8320 }, { "epoch": 0.4782500526285572, "grad_norm": 1.609375, "learning_rate": 8.89674549598291e-05, "loss": 6.9878, "step": 8330 }, { "epoch": 0.47882418234359747, "grad_norm": 1.6171875, "learning_rate": 8.893830809711472e-05, "loss": 7.0248, "step": 8340 }, { "epoch": 0.47939831205863775, "grad_norm": 1.5703125, "learning_rate": 8.89091275714776e-05, "loss": 7.0032, "step": 8350 }, { "epoch": 0.4799724417736781, "grad_norm": 1.6875, "learning_rate": 8.88799134081449e-05, "loss": 7.0167, "step": 8360 }, { "epoch": 0.4805465714887184, "grad_norm": 1.6328125, "learning_rate": 8.88506656323728e-05, "loss": 7.0079, "step": 8370 }, { "epoch": 0.48112070120375866, "grad_norm": 1.6015625, "learning_rate": 8.88213842694466e-05, "loss": 7.0017, "step": 8380 }, { "epoch": 0.48169483091879894, "grad_norm": 1.6328125, "learning_rate": 8.879206934468056e-05, "loss": 7.0265, "step": 8390 }, { "epoch": 0.4822689606338392, "grad_norm": 1.6015625, "learning_rate": 8.876272088341804e-05, "loss": 6.9952, "step": 8400 }, { "epoch": 0.4828430903488795, "grad_norm": 1.578125, "learning_rate": 8.873333891103135e-05, "loss": 6.9986, "step": 8410 }, { "epoch": 0.4834172200639198, "grad_norm": 1.6171875, "learning_rate": 8.870392345292175e-05, "loss": 6.9903, "step": 8420 }, { "epoch": 0.4839913497789601, "grad_norm": 1.625, "learning_rate": 8.867447453451952e-05, "loss": 7.0363, "step": 8430 }, { "epoch": 0.48456547949400036, "grad_norm": 1.6796875, "learning_rate": 8.864499218128377e-05, "loss": 7.0042, "step": 8440 }, { "epoch": 0.48513960920904065, "grad_norm": 1.6171875, "learning_rate": 8.86154764187026e-05, "loss": 7.0164, "step": 8450 }, { "epoch": 0.48571373892408093, "grad_norm": 1.5546875, "learning_rate": 8.858592727229295e-05, "loss": 7.0584, "step": 8460 }, { "epoch": 0.4862878686391212, "grad_norm": 1.5625, "learning_rate": 8.855634476760061e-05, "loss": 7.0018, "step": 8470 }, { "epoch": 0.4868619983541615, "grad_norm": 1.5859375, "learning_rate": 8.852672893020027e-05, "loss": 6.9839, "step": 8480 }, { "epoch": 0.4874361280692018, "grad_norm": 1.640625, "learning_rate": 8.849707978569537e-05, "loss": 7.004, "step": 8490 }, { "epoch": 0.48801025778424206, "grad_norm": 1.515625, "learning_rate": 8.846739735971817e-05, "loss": 7.0146, "step": 8500 }, { "epoch": 0.48858438749928235, "grad_norm": 1.59375, "learning_rate": 8.843768167792971e-05, "loss": 7.0398, "step": 8510 }, { "epoch": 0.48915851721432263, "grad_norm": 1.625, "learning_rate": 8.840793276601977e-05, "loss": 7.0074, "step": 8520 }, { "epoch": 0.4897326469293629, "grad_norm": 1.578125, "learning_rate": 8.837815064970687e-05, "loss": 6.9928, "step": 8530 }, { "epoch": 0.4903067766444032, "grad_norm": 1.6171875, "learning_rate": 8.834833535473822e-05, "loss": 7.0171, "step": 8540 }, { "epoch": 0.4908809063594435, "grad_norm": 1.5625, "learning_rate": 8.831848690688972e-05, "loss": 7.0176, "step": 8550 }, { "epoch": 0.49145503607448376, "grad_norm": 1.65625, "learning_rate": 8.828860533196593e-05, "loss": 7.0408, "step": 8560 }, { "epoch": 0.49202916578952405, "grad_norm": 1.625, "learning_rate": 8.825869065580006e-05, "loss": 7.0392, "step": 8570 }, { "epoch": 0.49260329550456433, "grad_norm": 1.7421875, "learning_rate": 8.822874290425391e-05, "loss": 7.0104, "step": 8580 }, { "epoch": 0.4931774252196046, "grad_norm": 1.640625, "learning_rate": 8.819876210321792e-05, "loss": 7.001, "step": 8590 }, { "epoch": 0.4937515549346449, "grad_norm": 1.640625, "learning_rate": 8.816874827861103e-05, "loss": 7.0417, "step": 8600 }, { "epoch": 0.4943256846496852, "grad_norm": 1.6171875, "learning_rate": 8.813870145638083e-05, "loss": 7.0276, "step": 8610 }, { "epoch": 0.49489981436472547, "grad_norm": 1.6171875, "learning_rate": 8.810862166250335e-05, "loss": 7.0135, "step": 8620 }, { "epoch": 0.49547394407976575, "grad_norm": 1.5, "learning_rate": 8.807850892298315e-05, "loss": 7.0043, "step": 8630 }, { "epoch": 0.49604807379480603, "grad_norm": 1.6796875, "learning_rate": 8.804836326385328e-05, "loss": 7.0027, "step": 8640 }, { "epoch": 0.4966222035098463, "grad_norm": 1.609375, "learning_rate": 8.801818471117528e-05, "loss": 7.0387, "step": 8650 }, { "epoch": 0.4971963332248866, "grad_norm": 1.6484375, "learning_rate": 8.798797329103905e-05, "loss": 7.0251, "step": 8660 }, { "epoch": 0.4977704629399269, "grad_norm": 1.59375, "learning_rate": 8.795772902956297e-05, "loss": 7.0148, "step": 8670 }, { "epoch": 0.49834459265496717, "grad_norm": 1.6484375, "learning_rate": 8.792745195289378e-05, "loss": 7.0388, "step": 8680 }, { "epoch": 0.49891872237000745, "grad_norm": 1.5625, "learning_rate": 8.789714208720661e-05, "loss": 7.0125, "step": 8690 }, { "epoch": 0.49949285208504773, "grad_norm": 1.625, "learning_rate": 8.786679945870491e-05, "loss": 7.0172, "step": 8700 }, { "epoch": 0.500066981800088, "grad_norm": 1.5546875, "learning_rate": 8.78364240936205e-05, "loss": 7.0287, "step": 8710 }, { "epoch": 0.5006411115151284, "grad_norm": 1.6171875, "learning_rate": 8.780601601821345e-05, "loss": 7.0163, "step": 8720 }, { "epoch": 0.5012152412301686, "grad_norm": 1.6796875, "learning_rate": 8.777557525877216e-05, "loss": 7.0036, "step": 8730 }, { "epoch": 0.5017893709452089, "grad_norm": 1.609375, "learning_rate": 8.774510184161322e-05, "loss": 6.9887, "step": 8740 }, { "epoch": 0.5023635006602492, "grad_norm": 1.578125, "learning_rate": 8.77145957930815e-05, "loss": 7.0021, "step": 8750 }, { "epoch": 0.5029376303752895, "grad_norm": 1.6015625, "learning_rate": 8.768405713955009e-05, "loss": 7.0171, "step": 8760 }, { "epoch": 0.5035117600903297, "grad_norm": 1.59375, "learning_rate": 8.765348590742021e-05, "loss": 7.0025, "step": 8770 }, { "epoch": 0.5040858898053701, "grad_norm": 1.6953125, "learning_rate": 8.762288212312133e-05, "loss": 7.0257, "step": 8780 }, { "epoch": 0.5046600195204103, "grad_norm": 1.5234375, "learning_rate": 8.759224581311098e-05, "loss": 7.0121, "step": 8790 }, { "epoch": 0.5052341492354506, "grad_norm": 1.59375, "learning_rate": 8.756157700387487e-05, "loss": 7.0393, "step": 8800 }, { "epoch": 0.5058082789504909, "grad_norm": 1.625, "learning_rate": 8.753087572192675e-05, "loss": 7.0138, "step": 8810 }, { "epoch": 0.5063824086655312, "grad_norm": 1.5625, "learning_rate": 8.750014199380852e-05, "loss": 7.0661, "step": 8820 }, { "epoch": 0.5069565383805714, "grad_norm": 1.6171875, "learning_rate": 8.746937584609003e-05, "loss": 7.0297, "step": 8830 }, { "epoch": 0.5075306680956118, "grad_norm": 1.640625, "learning_rate": 8.743857730536925e-05, "loss": 7.0149, "step": 8840 }, { "epoch": 0.508104797810652, "grad_norm": 1.53125, "learning_rate": 8.74077463982721e-05, "loss": 7.0319, "step": 8850 }, { "epoch": 0.5086789275256923, "grad_norm": 1.5234375, "learning_rate": 8.737688315145251e-05, "loss": 7.0174, "step": 8860 }, { "epoch": 0.5092530572407326, "grad_norm": 1.609375, "learning_rate": 8.734598759159234e-05, "loss": 7.0097, "step": 8870 }, { "epoch": 0.5098271869557729, "grad_norm": 1.671875, "learning_rate": 8.731505974540139e-05, "loss": 6.9883, "step": 8880 }, { "epoch": 0.5104013166708131, "grad_norm": 1.65625, "learning_rate": 8.728409963961744e-05, "loss": 7.0213, "step": 8890 }, { "epoch": 0.5109754463858535, "grad_norm": 1.6015625, "learning_rate": 8.725310730100602e-05, "loss": 7.0088, "step": 8900 }, { "epoch": 0.5115495761008937, "grad_norm": 1.59375, "learning_rate": 8.722208275636068e-05, "loss": 6.9966, "step": 8910 }, { "epoch": 0.512123705815934, "grad_norm": 1.546875, "learning_rate": 8.71910260325027e-05, "loss": 7.0113, "step": 8920 }, { "epoch": 0.5126978355309743, "grad_norm": 1.5390625, "learning_rate": 8.715993715628122e-05, "loss": 7.0126, "step": 8930 }, { "epoch": 0.5132719652460146, "grad_norm": 1.578125, "learning_rate": 8.71288161545732e-05, "loss": 7.0036, "step": 8940 }, { "epoch": 0.5138460949610548, "grad_norm": 1.6484375, "learning_rate": 8.709766305428334e-05, "loss": 7.004, "step": 8950 }, { "epoch": 0.5144202246760952, "grad_norm": 1.5234375, "learning_rate": 8.70664778823441e-05, "loss": 7.0153, "step": 8960 }, { "epoch": 0.5149943543911354, "grad_norm": 1.6484375, "learning_rate": 8.703526066571565e-05, "loss": 7.0101, "step": 8970 }, { "epoch": 0.5155684841061757, "grad_norm": 1.5546875, "learning_rate": 8.70040114313859e-05, "loss": 7.0252, "step": 8980 }, { "epoch": 0.516142613821216, "grad_norm": 1.609375, "learning_rate": 8.697273020637042e-05, "loss": 6.9998, "step": 8990 }, { "epoch": 0.5167167435362563, "grad_norm": 1.734375, "learning_rate": 8.694141701771241e-05, "loss": 7.0078, "step": 9000 }, { "epoch": 0.5172908732512965, "grad_norm": 1.5859375, "learning_rate": 8.691007189248276e-05, "loss": 7.0193, "step": 9010 }, { "epoch": 0.5178650029663369, "grad_norm": 1.640625, "learning_rate": 8.687869485777993e-05, "loss": 7.0169, "step": 9020 }, { "epoch": 0.5184391326813771, "grad_norm": 1.578125, "learning_rate": 8.684728594072995e-05, "loss": 6.9945, "step": 9030 }, { "epoch": 0.5190132623964174, "grad_norm": 1.6171875, "learning_rate": 8.681584516848648e-05, "loss": 7.002, "step": 9040 }, { "epoch": 0.5195873921114578, "grad_norm": 1.640625, "learning_rate": 8.678437256823065e-05, "loss": 7.0061, "step": 9050 }, { "epoch": 0.520161521826498, "grad_norm": 1.46875, "learning_rate": 8.675286816717114e-05, "loss": 7.0683, "step": 9060 }, { "epoch": 0.5207356515415383, "grad_norm": 1.6015625, "learning_rate": 8.67213319925441e-05, "loss": 6.9938, "step": 9070 }, { "epoch": 0.5213097812565786, "grad_norm": 1.5546875, "learning_rate": 8.66897640716132e-05, "loss": 7.0302, "step": 9080 }, { "epoch": 0.5218839109716189, "grad_norm": 1.4921875, "learning_rate": 8.66581644316695e-05, "loss": 6.9934, "step": 9090 }, { "epoch": 0.5224580406866591, "grad_norm": 1.65625, "learning_rate": 8.66265331000315e-05, "loss": 7.0016, "step": 9100 }, { "epoch": 0.5230321704016995, "grad_norm": 1.6015625, "learning_rate": 8.659487010404511e-05, "loss": 7.0131, "step": 9110 }, { "epoch": 0.5236063001167397, "grad_norm": 1.8203125, "learning_rate": 8.656317547108356e-05, "loss": 6.9904, "step": 9120 }, { "epoch": 0.52418042983178, "grad_norm": 1.640625, "learning_rate": 8.653144922854755e-05, "loss": 7.0, "step": 9130 }, { "epoch": 0.5247545595468203, "grad_norm": 1.609375, "learning_rate": 8.649969140386497e-05, "loss": 7.0017, "step": 9140 }, { "epoch": 0.5253286892618606, "grad_norm": 1.625, "learning_rate": 8.646790202449114e-05, "loss": 6.9942, "step": 9150 }, { "epoch": 0.5259028189769008, "grad_norm": 1.6796875, "learning_rate": 8.64360811179085e-05, "loss": 6.9673, "step": 9160 }, { "epoch": 0.5264769486919412, "grad_norm": 1.6640625, "learning_rate": 8.640422871162693e-05, "loss": 7.0162, "step": 9170 }, { "epoch": 0.5270510784069814, "grad_norm": 1.59375, "learning_rate": 8.637234483318342e-05, "loss": 7.0132, "step": 9180 }, { "epoch": 0.5276252081220217, "grad_norm": 1.53125, "learning_rate": 8.634042951014219e-05, "loss": 6.9909, "step": 9190 }, { "epoch": 0.528199337837062, "grad_norm": 1.640625, "learning_rate": 8.630848277009465e-05, "loss": 6.9867, "step": 9200 }, { "epoch": 0.5287734675521023, "grad_norm": 1.6015625, "learning_rate": 8.627650464065942e-05, "loss": 7.0013, "step": 9210 }, { "epoch": 0.5293475972671425, "grad_norm": 1.7265625, "learning_rate": 8.624449514948216e-05, "loss": 7.0366, "step": 9220 }, { "epoch": 0.5299217269821829, "grad_norm": 1.6875, "learning_rate": 8.621245432423575e-05, "loss": 7.042, "step": 9230 }, { "epoch": 0.5304958566972231, "grad_norm": 1.5625, "learning_rate": 8.618038219262006e-05, "loss": 7.017, "step": 9240 }, { "epoch": 0.5310699864122634, "grad_norm": 1.71875, "learning_rate": 8.614827878236209e-05, "loss": 6.9998, "step": 9250 }, { "epoch": 0.5316441161273037, "grad_norm": 1.6171875, "learning_rate": 8.611614412121584e-05, "loss": 6.9938, "step": 9260 }, { "epoch": 0.532218245842344, "grad_norm": 1.65625, "learning_rate": 8.608397823696239e-05, "loss": 7.0051, "step": 9270 }, { "epoch": 0.5327923755573842, "grad_norm": 1.6796875, "learning_rate": 8.605178115740975e-05, "loss": 6.9972, "step": 9280 }, { "epoch": 0.5333665052724246, "grad_norm": 1.6171875, "learning_rate": 8.60195529103929e-05, "loss": 7.0035, "step": 9290 }, { "epoch": 0.5339406349874648, "grad_norm": 1.5390625, "learning_rate": 8.598729352377381e-05, "loss": 7.0136, "step": 9300 }, { "epoch": 0.5345147647025051, "grad_norm": 1.5390625, "learning_rate": 8.595500302544133e-05, "loss": 7.0247, "step": 9310 }, { "epoch": 0.5350888944175454, "grad_norm": 1.59375, "learning_rate": 8.592268144331124e-05, "loss": 6.9363, "step": 9320 }, { "epoch": 0.5356630241325857, "grad_norm": 1.671875, "learning_rate": 8.589032880532615e-05, "loss": 6.9205, "step": 9330 }, { "epoch": 0.5362371538476259, "grad_norm": 1.640625, "learning_rate": 8.585794513945557e-05, "loss": 6.9942, "step": 9340 }, { "epoch": 0.5368112835626663, "grad_norm": 1.5703125, "learning_rate": 8.582553047369579e-05, "loss": 7.0191, "step": 9350 }, { "epoch": 0.5373854132777065, "grad_norm": 1.59375, "learning_rate": 8.579308483606991e-05, "loss": 6.9831, "step": 9360 }, { "epoch": 0.5379595429927468, "grad_norm": 1.5859375, "learning_rate": 8.576060825462784e-05, "loss": 6.9771, "step": 9370 }, { "epoch": 0.5385336727077871, "grad_norm": 1.671875, "learning_rate": 8.57281007574462e-05, "loss": 7.0119, "step": 9380 }, { "epoch": 0.5391078024228274, "grad_norm": 1.671875, "learning_rate": 8.569556237262834e-05, "loss": 6.9919, "step": 9390 }, { "epoch": 0.5396819321378676, "grad_norm": 1.6171875, "learning_rate": 8.566299312830433e-05, "loss": 7.0048, "step": 9400 }, { "epoch": 0.540256061852908, "grad_norm": 1.578125, "learning_rate": 8.563039305263095e-05, "loss": 7.0365, "step": 9410 }, { "epoch": 0.5408301915679482, "grad_norm": 1.546875, "learning_rate": 8.559776217379154e-05, "loss": 7.0129, "step": 9420 }, { "epoch": 0.5414043212829885, "grad_norm": 1.6796875, "learning_rate": 8.556510051999616e-05, "loss": 6.9662, "step": 9430 }, { "epoch": 0.5419784509980288, "grad_norm": 1.6328125, "learning_rate": 8.553240811948144e-05, "loss": 6.9831, "step": 9440 }, { "epoch": 0.5425525807130691, "grad_norm": 1.6015625, "learning_rate": 8.54996850005106e-05, "loss": 7.0207, "step": 9450 }, { "epoch": 0.5431267104281093, "grad_norm": 1.5703125, "learning_rate": 8.54669311913734e-05, "loss": 6.9951, "step": 9460 }, { "epoch": 0.5437008401431497, "grad_norm": 1.6640625, "learning_rate": 8.543414672038615e-05, "loss": 7.0068, "step": 9470 }, { "epoch": 0.5442749698581899, "grad_norm": 1.6015625, "learning_rate": 8.540133161589165e-05, "loss": 6.9715, "step": 9480 }, { "epoch": 0.5448490995732302, "grad_norm": 1.6484375, "learning_rate": 8.536848590625923e-05, "loss": 6.9811, "step": 9490 }, { "epoch": 0.5454232292882706, "grad_norm": 1.5859375, "learning_rate": 8.53356096198846e-05, "loss": 6.9818, "step": 9500 }, { "epoch": 0.5459973590033108, "grad_norm": 1.6328125, "learning_rate": 8.530270278518997e-05, "loss": 7.032, "step": 9510 }, { "epoch": 0.5465714887183512, "grad_norm": 1.640625, "learning_rate": 8.52697654306239e-05, "loss": 7.0155, "step": 9520 }, { "epoch": 0.5471456184333914, "grad_norm": 1.5390625, "learning_rate": 8.523679758466144e-05, "loss": 7.0373, "step": 9530 }, { "epoch": 0.5477197481484317, "grad_norm": 1.5703125, "learning_rate": 8.520379927580386e-05, "loss": 7.0093, "step": 9540 }, { "epoch": 0.548293877863472, "grad_norm": 1.5625, "learning_rate": 8.51707705325789e-05, "loss": 7.0006, "step": 9550 }, { "epoch": 0.5488680075785123, "grad_norm": 1.5859375, "learning_rate": 8.513771138354052e-05, "loss": 6.9994, "step": 9560 }, { "epoch": 0.5494421372935525, "grad_norm": 1.59375, "learning_rate": 8.5104621857269e-05, "loss": 7.004, "step": 9570 }, { "epoch": 0.5500162670085929, "grad_norm": 1.6328125, "learning_rate": 8.507150198237087e-05, "loss": 7.0039, "step": 9580 }, { "epoch": 0.5505903967236331, "grad_norm": 1.609375, "learning_rate": 8.503835178747892e-05, "loss": 6.9992, "step": 9590 }, { "epoch": 0.5511645264386734, "grad_norm": 1.6171875, "learning_rate": 8.500517130125212e-05, "loss": 7.0309, "step": 9600 }, { "epoch": 0.5517386561537136, "grad_norm": 1.59375, "learning_rate": 8.497196055237565e-05, "loss": 6.9839, "step": 9610 }, { "epoch": 0.552312785868754, "grad_norm": 1.609375, "learning_rate": 8.493871956956083e-05, "loss": 6.9932, "step": 9620 }, { "epoch": 0.5528869155837942, "grad_norm": 1.53125, "learning_rate": 8.490544838154518e-05, "loss": 7.0001, "step": 9630 }, { "epoch": 0.5534610452988346, "grad_norm": 1.5859375, "learning_rate": 8.487214701709225e-05, "loss": 7.0033, "step": 9640 }, { "epoch": 0.5540351750138748, "grad_norm": 1.7109375, "learning_rate": 8.48388155049917e-05, "loss": 7.0179, "step": 9650 }, { "epoch": 0.5546093047289151, "grad_norm": 1.6171875, "learning_rate": 8.480545387405933e-05, "loss": 7.0158, "step": 9660 }, { "epoch": 0.5551834344439553, "grad_norm": 1.6953125, "learning_rate": 8.477206215313687e-05, "loss": 6.971, "step": 9670 }, { "epoch": 0.5557575641589957, "grad_norm": 1.625, "learning_rate": 8.473864037109212e-05, "loss": 6.9604, "step": 9680 }, { "epoch": 0.5563316938740359, "grad_norm": 1.65625, "learning_rate": 8.470518855681886e-05, "loss": 7.0133, "step": 9690 }, { "epoch": 0.5569058235890763, "grad_norm": 1.6484375, "learning_rate": 8.467170673923684e-05, "loss": 6.9575, "step": 9700 }, { "epoch": 0.5574799533041165, "grad_norm": 1.5625, "learning_rate": 8.463819494729173e-05, "loss": 6.9639, "step": 9710 }, { "epoch": 0.5580540830191568, "grad_norm": 1.578125, "learning_rate": 8.460465320995513e-05, "loss": 6.995, "step": 9720 }, { "epoch": 0.558628212734197, "grad_norm": 1.5625, "learning_rate": 8.45710815562245e-05, "loss": 7.0093, "step": 9730 }, { "epoch": 0.5592023424492374, "grad_norm": 1.6015625, "learning_rate": 8.453748001512322e-05, "loss": 6.9967, "step": 9740 }, { "epoch": 0.5597764721642776, "grad_norm": 1.625, "learning_rate": 8.450384861570047e-05, "loss": 7.0163, "step": 9750 }, { "epoch": 0.560350601879318, "grad_norm": 1.578125, "learning_rate": 8.447018738703122e-05, "loss": 6.9971, "step": 9760 }, { "epoch": 0.5609247315943582, "grad_norm": 1.6328125, "learning_rate": 8.443649635821629e-05, "loss": 6.9673, "step": 9770 }, { "epoch": 0.5614988613093985, "grad_norm": 1.5546875, "learning_rate": 8.44027755583822e-05, "loss": 6.9829, "step": 9780 }, { "epoch": 0.5620729910244388, "grad_norm": 1.5859375, "learning_rate": 8.436902501668124e-05, "loss": 6.9677, "step": 9790 }, { "epoch": 0.5626471207394791, "grad_norm": 1.578125, "learning_rate": 8.433524476229142e-05, "loss": 7.0134, "step": 9800 }, { "epoch": 0.5632212504545193, "grad_norm": 1.6484375, "learning_rate": 8.430143482441643e-05, "loss": 7.0174, "step": 9810 }, { "epoch": 0.5637953801695597, "grad_norm": 1.6953125, "learning_rate": 8.42675952322856e-05, "loss": 7.0116, "step": 9820 }, { "epoch": 0.5643695098845999, "grad_norm": 1.6796875, "learning_rate": 8.423372601515391e-05, "loss": 6.9594, "step": 9830 }, { "epoch": 0.5649436395996402, "grad_norm": 1.6484375, "learning_rate": 8.419982720230199e-05, "loss": 6.9933, "step": 9840 }, { "epoch": 0.5655177693146805, "grad_norm": 1.6875, "learning_rate": 8.416589882303598e-05, "loss": 6.9948, "step": 9850 }, { "epoch": 0.5660918990297208, "grad_norm": 1.671875, "learning_rate": 8.413194090668766e-05, "loss": 6.9934, "step": 9860 }, { "epoch": 0.566666028744761, "grad_norm": 1.6171875, "learning_rate": 8.409795348261427e-05, "loss": 6.9736, "step": 9870 }, { "epoch": 0.5672401584598014, "grad_norm": 1.53125, "learning_rate": 8.40639365801986e-05, "loss": 6.9948, "step": 9880 }, { "epoch": 0.5678142881748416, "grad_norm": 1.59375, "learning_rate": 8.402989022884896e-05, "loss": 6.9928, "step": 9890 }, { "epoch": 0.5683884178898819, "grad_norm": 1.625, "learning_rate": 8.399581445799905e-05, "loss": 6.9894, "step": 9900 }, { "epoch": 0.5689625476049222, "grad_norm": 1.6484375, "learning_rate": 8.396170929710805e-05, "loss": 6.9837, "step": 9910 }, { "epoch": 0.5695366773199625, "grad_norm": 944.0, "learning_rate": 8.392757477566051e-05, "loss": 7.0183, "step": 9920 }, { "epoch": 0.5701108070350027, "grad_norm": 1.71875, "learning_rate": 8.389341092316642e-05, "loss": 6.9961, "step": 9930 }, { "epoch": 0.5706849367500431, "grad_norm": 1.6328125, "learning_rate": 8.385921776916106e-05, "loss": 7.0278, "step": 9940 }, { "epoch": 0.5712590664650834, "grad_norm": 1.5859375, "learning_rate": 8.382499534320509e-05, "loss": 7.014, "step": 9950 }, { "epoch": 0.5718331961801236, "grad_norm": 1.5703125, "learning_rate": 8.379074367488446e-05, "loss": 6.9726, "step": 9960 }, { "epoch": 0.572407325895164, "grad_norm": 1.5703125, "learning_rate": 8.375646279381042e-05, "loss": 7.0086, "step": 9970 }, { "epoch": 0.5729814556102042, "grad_norm": 1.5703125, "learning_rate": 8.372215272961943e-05, "loss": 6.9934, "step": 9980 }, { "epoch": 0.5735555853252445, "grad_norm": 1.6875, "learning_rate": 8.368781351197321e-05, "loss": 7.0058, "step": 9990 }, { "epoch": 0.5741297150402848, "grad_norm": 1.6796875, "learning_rate": 8.36534451705587e-05, "loss": 6.9995, "step": 10000 }, { "epoch": 0.5747038447553251, "grad_norm": 1.5390625, "learning_rate": 8.361904773508798e-05, "loss": 7.0011, "step": 10010 }, { "epoch": 0.5752779744703653, "grad_norm": 1.546875, "learning_rate": 8.358462123529829e-05, "loss": 6.9823, "step": 10020 }, { "epoch": 0.5758521041854057, "grad_norm": 1.6484375, "learning_rate": 8.355016570095204e-05, "loss": 7.0104, "step": 10030 }, { "epoch": 0.5764262339004459, "grad_norm": 1.5234375, "learning_rate": 8.351568116183667e-05, "loss": 7.0075, "step": 10040 }, { "epoch": 0.5770003636154862, "grad_norm": 1.59375, "learning_rate": 8.348116764776475e-05, "loss": 6.9775, "step": 10050 }, { "epoch": 0.5775744933305265, "grad_norm": 1.578125, "learning_rate": 8.344662518857388e-05, "loss": 6.9956, "step": 10060 }, { "epoch": 0.5781486230455668, "grad_norm": 1.59375, "learning_rate": 8.34120538141267e-05, "loss": 7.0129, "step": 10070 }, { "epoch": 0.578722752760607, "grad_norm": 1.7265625, "learning_rate": 8.337745355431083e-05, "loss": 6.985, "step": 10080 }, { "epoch": 0.5792968824756474, "grad_norm": 1.65625, "learning_rate": 8.334282443903886e-05, "loss": 7.025, "step": 10090 }, { "epoch": 0.5798710121906876, "grad_norm": 1.5859375, "learning_rate": 8.330816649824833e-05, "loss": 6.9988, "step": 10100 }, { "epoch": 0.5804451419057279, "grad_norm": 1.4921875, "learning_rate": 8.32734797619017e-05, "loss": 6.9752, "step": 10110 }, { "epoch": 0.5810192716207682, "grad_norm": 1.609375, "learning_rate": 8.323876425998633e-05, "loss": 6.9748, "step": 10120 }, { "epoch": 0.5815934013358085, "grad_norm": 1.6875, "learning_rate": 8.320402002251446e-05, "loss": 7.0053, "step": 10130 }, { "epoch": 0.5821675310508487, "grad_norm": 1.5859375, "learning_rate": 8.316924707952312e-05, "loss": 6.9957, "step": 10140 }, { "epoch": 0.5827416607658891, "grad_norm": 1.7109375, "learning_rate": 8.313444546107423e-05, "loss": 6.9575, "step": 10150 }, { "epoch": 0.5833157904809293, "grad_norm": 1.6171875, "learning_rate": 8.309961519725444e-05, "loss": 6.9751, "step": 10160 }, { "epoch": 0.5838899201959696, "grad_norm": 1.6484375, "learning_rate": 8.30647563181752e-05, "loss": 6.9962, "step": 10170 }, { "epoch": 0.5844640499110099, "grad_norm": 1.578125, "learning_rate": 8.30298688539727e-05, "loss": 6.9946, "step": 10180 }, { "epoch": 0.5850381796260502, "grad_norm": 1.5703125, "learning_rate": 8.29949528348078e-05, "loss": 6.9892, "step": 10190 }, { "epoch": 0.5856123093410904, "grad_norm": 1.59375, "learning_rate": 8.296000829086611e-05, "loss": 6.9865, "step": 10200 }, { "epoch": 0.5861864390561308, "grad_norm": 1.6796875, "learning_rate": 8.292503525235785e-05, "loss": 6.9942, "step": 10210 }, { "epoch": 0.586760568771171, "grad_norm": 1.7734375, "learning_rate": 8.289003374951786e-05, "loss": 6.988, "step": 10220 }, { "epoch": 0.5873346984862113, "grad_norm": 1.65625, "learning_rate": 8.285500381260567e-05, "loss": 6.9678, "step": 10230 }, { "epoch": 0.5879088282012516, "grad_norm": 1.5625, "learning_rate": 8.28199454719053e-05, "loss": 6.9993, "step": 10240 }, { "epoch": 0.5884829579162919, "grad_norm": 1.578125, "learning_rate": 8.27848587577254e-05, "loss": 6.9852, "step": 10250 }, { "epoch": 0.5890570876313321, "grad_norm": 1.5859375, "learning_rate": 8.274974370039909e-05, "loss": 6.9722, "step": 10260 }, { "epoch": 0.5896312173463725, "grad_norm": 1.546875, "learning_rate": 8.271460033028401e-05, "loss": 7.0244, "step": 10270 }, { "epoch": 0.5902053470614127, "grad_norm": 1.5703125, "learning_rate": 8.267942867776233e-05, "loss": 7.0165, "step": 10280 }, { "epoch": 0.590779476776453, "grad_norm": 1.65625, "learning_rate": 8.264422877324059e-05, "loss": 6.9987, "step": 10290 }, { "epoch": 0.5913536064914933, "grad_norm": 1.625, "learning_rate": 8.260900064714978e-05, "loss": 6.9803, "step": 10300 }, { "epoch": 0.5919277362065336, "grad_norm": 1.5546875, "learning_rate": 8.257374432994532e-05, "loss": 6.9853, "step": 10310 }, { "epoch": 0.5925018659215738, "grad_norm": 1.6015625, "learning_rate": 8.253845985210697e-05, "loss": 6.9921, "step": 10320 }, { "epoch": 0.5930759956366142, "grad_norm": 1.59375, "learning_rate": 8.250314724413888e-05, "loss": 7.0052, "step": 10330 }, { "epoch": 0.5936501253516544, "grad_norm": 1.5859375, "learning_rate": 8.246780653656942e-05, "loss": 6.9785, "step": 10340 }, { "epoch": 0.5942242550666947, "grad_norm": 1.609375, "learning_rate": 8.243243775995138e-05, "loss": 7.0001, "step": 10350 }, { "epoch": 0.594798384781735, "grad_norm": 1.640625, "learning_rate": 8.239704094486171e-05, "loss": 6.9794, "step": 10360 }, { "epoch": 0.5953725144967753, "grad_norm": 1.5234375, "learning_rate": 8.236161612190167e-05, "loss": 6.9948, "step": 10370 }, { "epoch": 0.5959466442118155, "grad_norm": 1.578125, "learning_rate": 8.232616332169669e-05, "loss": 6.9464, "step": 10380 }, { "epoch": 0.5965207739268559, "grad_norm": 1.6796875, "learning_rate": 8.229068257489643e-05, "loss": 6.9786, "step": 10390 }, { "epoch": 0.5970949036418962, "grad_norm": 1.5546875, "learning_rate": 8.225517391217464e-05, "loss": 7.0033, "step": 10400 }, { "epoch": 0.5976690333569364, "grad_norm": 1.703125, "learning_rate": 8.221963736422929e-05, "loss": 6.974, "step": 10410 }, { "epoch": 0.5982431630719768, "grad_norm": 1.625, "learning_rate": 8.218407296178238e-05, "loss": 6.9626, "step": 10420 }, { "epoch": 0.598817292787017, "grad_norm": 1.7734375, "learning_rate": 8.214848073558006e-05, "loss": 6.9609, "step": 10430 }, { "epoch": 0.5993914225020573, "grad_norm": 1.5625, "learning_rate": 8.211286071639246e-05, "loss": 6.9778, "step": 10440 }, { "epoch": 0.5999655522170976, "grad_norm": 1.6875, "learning_rate": 8.207721293501383e-05, "loss": 6.9841, "step": 10450 }, { "epoch": 0.6005396819321379, "grad_norm": 1.5546875, "learning_rate": 8.20415374222623e-05, "loss": 6.9789, "step": 10460 }, { "epoch": 0.6011138116471781, "grad_norm": 1.6171875, "learning_rate": 8.200583420898012e-05, "loss": 6.9939, "step": 10470 }, { "epoch": 0.6016879413622185, "grad_norm": 1.6484375, "learning_rate": 8.197010332603336e-05, "loss": 6.9697, "step": 10480 }, { "epoch": 0.6022620710772587, "grad_norm": 1.5546875, "learning_rate": 8.193434480431206e-05, "loss": 6.975, "step": 10490 }, { "epoch": 0.602836200792299, "grad_norm": 1.5390625, "learning_rate": 8.189855867473018e-05, "loss": 7.0106, "step": 10500 }, { "epoch": 0.6034103305073393, "grad_norm": 1.609375, "learning_rate": 8.186274496822552e-05, "loss": 6.9875, "step": 10510 }, { "epoch": 0.6039844602223796, "grad_norm": 1.5859375, "learning_rate": 8.182690371575971e-05, "loss": 6.9691, "step": 10520 }, { "epoch": 0.6045585899374198, "grad_norm": 1.578125, "learning_rate": 8.179103494831821e-05, "loss": 6.9972, "step": 10530 }, { "epoch": 0.6051327196524602, "grad_norm": 1.65625, "learning_rate": 8.175513869691027e-05, "loss": 6.9925, "step": 10540 }, { "epoch": 0.6057068493675004, "grad_norm": 1.609375, "learning_rate": 8.171921499256891e-05, "loss": 6.9612, "step": 10550 }, { "epoch": 0.6062809790825407, "grad_norm": 1.578125, "learning_rate": 8.168326386635083e-05, "loss": 6.9736, "step": 10560 }, { "epoch": 0.606855108797581, "grad_norm": 1.6484375, "learning_rate": 8.164728534933653e-05, "loss": 6.9866, "step": 10570 }, { "epoch": 0.6074292385126213, "grad_norm": 1.7421875, "learning_rate": 8.161127947263007e-05, "loss": 7.0039, "step": 10580 }, { "epoch": 0.6080033682276615, "grad_norm": 1.5703125, "learning_rate": 8.15752462673593e-05, "loss": 7.0022, "step": 10590 }, { "epoch": 0.6085774979427019, "grad_norm": 1.6171875, "learning_rate": 8.153918576467558e-05, "loss": 7.0146, "step": 10600 }, { "epoch": 0.6091516276577421, "grad_norm": 1.59375, "learning_rate": 8.150309799575394e-05, "loss": 6.998, "step": 10610 }, { "epoch": 0.6097257573727825, "grad_norm": 1.5703125, "learning_rate": 8.146698299179291e-05, "loss": 6.9919, "step": 10620 }, { "epoch": 0.6102998870878227, "grad_norm": 1.671875, "learning_rate": 8.143084078401467e-05, "loss": 6.9656, "step": 10630 }, { "epoch": 0.610874016802863, "grad_norm": 1.59375, "learning_rate": 8.139467140366483e-05, "loss": 6.9782, "step": 10640 }, { "epoch": 0.6114481465179032, "grad_norm": 1.6328125, "learning_rate": 8.135847488201251e-05, "loss": 6.9906, "step": 10650 }, { "epoch": 0.6120222762329436, "grad_norm": 1.5, "learning_rate": 8.132225125035032e-05, "loss": 6.9975, "step": 10660 }, { "epoch": 0.6125964059479838, "grad_norm": 1.59375, "learning_rate": 8.128600053999431e-05, "loss": 6.9186, "step": 10670 }, { "epoch": 0.6131705356630242, "grad_norm": 1.5859375, "learning_rate": 8.124972278228389e-05, "loss": 6.9996, "step": 10680 }, { "epoch": 0.6137446653780644, "grad_norm": 1.6953125, "learning_rate": 8.121341800858189e-05, "loss": 6.9948, "step": 10690 }, { "epoch": 0.6143187950931047, "grad_norm": 1.625, "learning_rate": 8.117708625027451e-05, "loss": 6.972, "step": 10700 }, { "epoch": 0.614892924808145, "grad_norm": 1.53125, "learning_rate": 8.114072753877125e-05, "loss": 7.0159, "step": 10710 }, { "epoch": 0.6154670545231853, "grad_norm": 1.609375, "learning_rate": 8.110434190550493e-05, "loss": 6.9762, "step": 10720 }, { "epoch": 0.6160411842382255, "grad_norm": 1.59375, "learning_rate": 8.106792938193162e-05, "loss": 6.9775, "step": 10730 }, { "epoch": 0.6166153139532659, "grad_norm": 1.59375, "learning_rate": 8.103148999953065e-05, "loss": 6.9968, "step": 10740 }, { "epoch": 0.6171894436683061, "grad_norm": 1.625, "learning_rate": 8.099502378980459e-05, "loss": 6.9856, "step": 10750 }, { "epoch": 0.6177635733833464, "grad_norm": 1.5546875, "learning_rate": 8.095853078427918e-05, "loss": 7.0284, "step": 10760 }, { "epoch": 0.6183377030983866, "grad_norm": 1.6015625, "learning_rate": 8.092201101450332e-05, "loss": 6.9898, "step": 10770 }, { "epoch": 0.618911832813427, "grad_norm": 1.609375, "learning_rate": 8.088546451204909e-05, "loss": 6.9919, "step": 10780 }, { "epoch": 0.6194859625284672, "grad_norm": 1.578125, "learning_rate": 8.084889130851163e-05, "loss": 7.0162, "step": 10790 }, { "epoch": 0.6200600922435076, "grad_norm": 1.6171875, "learning_rate": 8.081229143550917e-05, "loss": 6.9979, "step": 10800 }, { "epoch": 0.6206342219585478, "grad_norm": 1.640625, "learning_rate": 8.077566492468302e-05, "loss": 6.9826, "step": 10810 }, { "epoch": 0.6212083516735881, "grad_norm": 1.515625, "learning_rate": 8.073901180769752e-05, "loss": 6.9901, "step": 10820 }, { "epoch": 0.6217824813886283, "grad_norm": 1.640625, "learning_rate": 8.070233211623999e-05, "loss": 6.9824, "step": 10830 }, { "epoch": 0.6223566111036687, "grad_norm": 1.65625, "learning_rate": 8.066562588202073e-05, "loss": 6.9176, "step": 10840 }, { "epoch": 0.622930740818709, "grad_norm": 1.6015625, "learning_rate": 8.062889313677302e-05, "loss": 6.9748, "step": 10850 }, { "epoch": 0.6235048705337493, "grad_norm": 1.609375, "learning_rate": 8.059213391225301e-05, "loss": 7.001, "step": 10860 }, { "epoch": 0.6240790002487896, "grad_norm": 1.734375, "learning_rate": 8.055534824023976e-05, "loss": 6.9788, "step": 10870 }, { "epoch": 0.6246531299638298, "grad_norm": 1.578125, "learning_rate": 8.05185361525352e-05, "loss": 6.9838, "step": 10880 }, { "epoch": 0.6252272596788702, "grad_norm": 1.53125, "learning_rate": 8.04816976809641e-05, "loss": 6.9763, "step": 10890 }, { "epoch": 0.6258013893939104, "grad_norm": 1.6015625, "learning_rate": 8.044483285737401e-05, "loss": 6.9989, "step": 10900 }, { "epoch": 0.6263755191089507, "grad_norm": 1.6328125, "learning_rate": 8.040794171363531e-05, "loss": 7.0087, "step": 10910 }, { "epoch": 0.626949648823991, "grad_norm": 1.578125, "learning_rate": 8.037102428164112e-05, "loss": 6.9784, "step": 10920 }, { "epoch": 0.6275237785390313, "grad_norm": 1.6796875, "learning_rate": 8.033408059330725e-05, "loss": 6.9524, "step": 10930 }, { "epoch": 0.6280979082540715, "grad_norm": 1.59375, "learning_rate": 8.029711068057224e-05, "loss": 6.9713, "step": 10940 }, { "epoch": 0.6286720379691119, "grad_norm": 1.6171875, "learning_rate": 8.02601145753973e-05, "loss": 6.9852, "step": 10950 }, { "epoch": 0.6292461676841521, "grad_norm": 1.625, "learning_rate": 8.022309230976628e-05, "loss": 7.0048, "step": 10960 }, { "epoch": 0.6298202973991924, "grad_norm": 1.5234375, "learning_rate": 8.018604391568564e-05, "loss": 6.982, "step": 10970 }, { "epoch": 0.6303944271142327, "grad_norm": 1.65625, "learning_rate": 8.014896942518446e-05, "loss": 6.9608, "step": 10980 }, { "epoch": 0.630968556829273, "grad_norm": 1.6015625, "learning_rate": 8.011186887031434e-05, "loss": 6.9604, "step": 10990 }, { "epoch": 0.6315426865443132, "grad_norm": 1.578125, "learning_rate": 8.007474228314942e-05, "loss": 6.9843, "step": 11000 }, { "epoch": 0.6321168162593536, "grad_norm": 1.6875, "learning_rate": 8.003758969578636e-05, "loss": 6.988, "step": 11010 }, { "epoch": 0.6326909459743938, "grad_norm": 1.625, "learning_rate": 8.000041114034431e-05, "loss": 6.9857, "step": 11020 }, { "epoch": 0.6332650756894341, "grad_norm": 1.5703125, "learning_rate": 7.996320664896483e-05, "loss": 6.9709, "step": 11030 }, { "epoch": 0.6338392054044744, "grad_norm": 1.65625, "learning_rate": 7.992597625381195e-05, "loss": 6.9667, "step": 11040 }, { "epoch": 0.6344133351195147, "grad_norm": 1.6640625, "learning_rate": 7.988871998707204e-05, "loss": 6.9538, "step": 11050 }, { "epoch": 0.6349874648345549, "grad_norm": 1.65625, "learning_rate": 7.985143788095389e-05, "loss": 6.9634, "step": 11060 }, { "epoch": 0.6355615945495953, "grad_norm": 1.5546875, "learning_rate": 7.981412996768858e-05, "loss": 6.9727, "step": 11070 }, { "epoch": 0.6361357242646355, "grad_norm": 1.5625, "learning_rate": 7.977679627952953e-05, "loss": 7.0057, "step": 11080 }, { "epoch": 0.6367098539796758, "grad_norm": 1.5390625, "learning_rate": 7.973943684875245e-05, "loss": 6.9734, "step": 11090 }, { "epoch": 0.6372839836947161, "grad_norm": 1.640625, "learning_rate": 7.970205170765528e-05, "loss": 6.9587, "step": 11100 }, { "epoch": 0.6378581134097564, "grad_norm": 1.7265625, "learning_rate": 7.966464088855822e-05, "loss": 6.9155, "step": 11110 }, { "epoch": 0.6384322431247966, "grad_norm": 1.59375, "learning_rate": 7.962720442380364e-05, "loss": 6.961, "step": 11120 }, { "epoch": 0.639006372839837, "grad_norm": 1.6171875, "learning_rate": 7.958974234575607e-05, "loss": 6.9626, "step": 11130 }, { "epoch": 0.6395805025548772, "grad_norm": 1.734375, "learning_rate": 7.955225468680223e-05, "loss": 6.9431, "step": 11140 }, { "epoch": 0.6401546322699175, "grad_norm": 1.6328125, "learning_rate": 7.951474147935091e-05, "loss": 6.9833, "step": 11150 }, { "epoch": 0.6407287619849578, "grad_norm": 1.59375, "learning_rate": 7.947720275583301e-05, "loss": 6.9595, "step": 11160 }, { "epoch": 0.6413028916999981, "grad_norm": 1.5625, "learning_rate": 7.943963854870149e-05, "loss": 6.9866, "step": 11170 }, { "epoch": 0.6418770214150383, "grad_norm": 1.6015625, "learning_rate": 7.940204889043135e-05, "loss": 6.9559, "step": 11180 }, { "epoch": 0.6424511511300787, "grad_norm": 1.578125, "learning_rate": 7.936443381351954e-05, "loss": 6.9778, "step": 11190 }, { "epoch": 0.6430252808451189, "grad_norm": 1.7109375, "learning_rate": 7.932679335048506e-05, "loss": 6.9584, "step": 11200 }, { "epoch": 0.6435994105601592, "grad_norm": 1.5859375, "learning_rate": 7.92891275338688e-05, "loss": 6.9656, "step": 11210 }, { "epoch": 0.6441735402751995, "grad_norm": 1.6328125, "learning_rate": 7.92514363962336e-05, "loss": 6.9864, "step": 11220 }, { "epoch": 0.6447476699902398, "grad_norm": 1.7890625, "learning_rate": 7.921371997016416e-05, "loss": 6.9844, "step": 11230 }, { "epoch": 0.64532179970528, "grad_norm": 1.6875, "learning_rate": 7.91759782882671e-05, "loss": 6.9804, "step": 11240 }, { "epoch": 0.6458959294203204, "grad_norm": 1.6953125, "learning_rate": 7.913821138317079e-05, "loss": 6.9777, "step": 11250 }, { "epoch": 0.6464700591353606, "grad_norm": 1.578125, "learning_rate": 7.91004192875255e-05, "loss": 6.974, "step": 11260 }, { "epoch": 0.6470441888504009, "grad_norm": 1.7421875, "learning_rate": 7.906260203400319e-05, "loss": 6.9603, "step": 11270 }, { "epoch": 0.6476183185654412, "grad_norm": 1.5859375, "learning_rate": 7.902475965529763e-05, "loss": 6.9737, "step": 11280 }, { "epoch": 0.6481924482804815, "grad_norm": 1.578125, "learning_rate": 7.898689218412427e-05, "loss": 6.9711, "step": 11290 }, { "epoch": 0.6487665779955218, "grad_norm": 1.703125, "learning_rate": 7.894899965322031e-05, "loss": 6.956, "step": 11300 }, { "epoch": 0.6493407077105621, "grad_norm": 1.6015625, "learning_rate": 7.891108209534455e-05, "loss": 6.9562, "step": 11310 }, { "epoch": 0.6499148374256024, "grad_norm": 1.796875, "learning_rate": 7.887313954327745e-05, "loss": 7.0049, "step": 11320 }, { "epoch": 0.6504889671406426, "grad_norm": 1.6484375, "learning_rate": 7.88351720298211e-05, "loss": 7.0021, "step": 11330 }, { "epoch": 0.651063096855683, "grad_norm": 1.6484375, "learning_rate": 7.879717958779915e-05, "loss": 6.9747, "step": 11340 }, { "epoch": 0.6516372265707232, "grad_norm": 1.6171875, "learning_rate": 7.87591622500568e-05, "loss": 6.9826, "step": 11350 }, { "epoch": 0.6522113562857635, "grad_norm": 1.640625, "learning_rate": 7.872112004946075e-05, "loss": 6.9759, "step": 11360 }, { "epoch": 0.6527854860008038, "grad_norm": 1.625, "learning_rate": 7.868305301889927e-05, "loss": 6.9862, "step": 11370 }, { "epoch": 0.6533596157158441, "grad_norm": 1.5, "learning_rate": 7.864496119128202e-05, "loss": 6.9906, "step": 11380 }, { "epoch": 0.6539337454308843, "grad_norm": 1.7109375, "learning_rate": 7.860684459954011e-05, "loss": 6.978, "step": 11390 }, { "epoch": 0.6545078751459247, "grad_norm": 1.65625, "learning_rate": 7.856870327662611e-05, "loss": 6.9735, "step": 11400 }, { "epoch": 0.6550820048609649, "grad_norm": 1.546875, "learning_rate": 7.853053725551389e-05, "loss": 6.9732, "step": 11410 }, { "epoch": 0.6556561345760052, "grad_norm": 1.5234375, "learning_rate": 7.849234656919875e-05, "loss": 7.0027, "step": 11420 }, { "epoch": 0.6562302642910455, "grad_norm": 1.6328125, "learning_rate": 7.845413125069727e-05, "loss": 6.9497, "step": 11430 }, { "epoch": 0.6568043940060858, "grad_norm": 1.5859375, "learning_rate": 7.841589133304732e-05, "loss": 6.9555, "step": 11440 }, { "epoch": 0.657378523721126, "grad_norm": 1.4921875, "learning_rate": 7.837762684930806e-05, "loss": 6.9795, "step": 11450 }, { "epoch": 0.6579526534361664, "grad_norm": 1.6640625, "learning_rate": 7.833933783255988e-05, "loss": 6.9998, "step": 11460 }, { "epoch": 0.6585267831512066, "grad_norm": 1.578125, "learning_rate": 7.83010243159044e-05, "loss": 7.0157, "step": 11470 }, { "epoch": 0.659100912866247, "grad_norm": 1.6015625, "learning_rate": 7.826268633246435e-05, "loss": 6.9558, "step": 11480 }, { "epoch": 0.6596750425812872, "grad_norm": 1.578125, "learning_rate": 7.822432391538371e-05, "loss": 6.9717, "step": 11490 }, { "epoch": 0.6602491722963275, "grad_norm": 1.6640625, "learning_rate": 7.818593709782749e-05, "loss": 6.9613, "step": 11500 }, { "epoch": 0.6608233020113677, "grad_norm": 1.6875, "learning_rate": 7.814752591298186e-05, "loss": 6.9635, "step": 11510 }, { "epoch": 0.6613974317264081, "grad_norm": 1.5234375, "learning_rate": 7.810909039405402e-05, "loss": 7.0205, "step": 11520 }, { "epoch": 0.6619715614414483, "grad_norm": 1.671875, "learning_rate": 7.807063057427226e-05, "loss": 6.9883, "step": 11530 }, { "epoch": 0.6625456911564886, "grad_norm": 1.609375, "learning_rate": 7.803214648688581e-05, "loss": 6.9535, "step": 11540 }, { "epoch": 0.6631198208715289, "grad_norm": 1.5859375, "learning_rate": 7.799363816516491e-05, "loss": 6.944, "step": 11550 }, { "epoch": 0.6636939505865692, "grad_norm": 1.5625, "learning_rate": 7.795510564240076e-05, "loss": 6.9636, "step": 11560 }, { "epoch": 0.6642680803016094, "grad_norm": 1.5546875, "learning_rate": 7.791654895190548e-05, "loss": 7.007, "step": 11570 }, { "epoch": 0.6648422100166498, "grad_norm": 1.5546875, "learning_rate": 7.787796812701204e-05, "loss": 6.9788, "step": 11580 }, { "epoch": 0.66541633973169, "grad_norm": 1.59375, "learning_rate": 7.783936320107437e-05, "loss": 6.9309, "step": 11590 }, { "epoch": 0.6659904694467303, "grad_norm": 1.625, "learning_rate": 7.780073420746712e-05, "loss": 6.9335, "step": 11600 }, { "epoch": 0.6665645991617706, "grad_norm": 1.6171875, "learning_rate": 7.776208117958585e-05, "loss": 6.9752, "step": 11610 }, { "epoch": 0.6671387288768109, "grad_norm": 1.5625, "learning_rate": 7.772340415084681e-05, "loss": 6.9545, "step": 11620 }, { "epoch": 0.6677128585918511, "grad_norm": 1.8125, "learning_rate": 7.768470315468707e-05, "loss": 6.9765, "step": 11630 }, { "epoch": 0.6682869883068915, "grad_norm": 1.5859375, "learning_rate": 7.76459782245644e-05, "loss": 6.9908, "step": 11640 }, { "epoch": 0.6688611180219317, "grad_norm": 1.5703125, "learning_rate": 7.760722939395724e-05, "loss": 6.9714, "step": 11650 }, { "epoch": 0.669435247736972, "grad_norm": 1.59375, "learning_rate": 7.756845669636469e-05, "loss": 6.9427, "step": 11660 }, { "epoch": 0.6700093774520123, "grad_norm": 1.671875, "learning_rate": 7.752966016530652e-05, "loss": 6.9655, "step": 11670 }, { "epoch": 0.6705835071670526, "grad_norm": 1.7265625, "learning_rate": 7.749083983432308e-05, "loss": 7.0029, "step": 11680 }, { "epoch": 0.6711576368820928, "grad_norm": 1.6875, "learning_rate": 7.74519957369753e-05, "loss": 6.9661, "step": 11690 }, { "epoch": 0.6717317665971332, "grad_norm": 1.5703125, "learning_rate": 7.741312790684465e-05, "loss": 6.9679, "step": 11700 }, { "epoch": 0.6723058963121734, "grad_norm": 1.6328125, "learning_rate": 7.737423637753313e-05, "loss": 6.9563, "step": 11710 }, { "epoch": 0.6728800260272138, "grad_norm": 1.59375, "learning_rate": 7.73353211826632e-05, "loss": 6.9657, "step": 11720 }, { "epoch": 0.673454155742254, "grad_norm": 1.6640625, "learning_rate": 7.729638235587783e-05, "loss": 6.9761, "step": 11730 }, { "epoch": 0.6740282854572943, "grad_norm": 1.6484375, "learning_rate": 7.72574199308404e-05, "loss": 6.9707, "step": 11740 }, { "epoch": 0.6746024151723347, "grad_norm": 1.546875, "learning_rate": 7.721843394123465e-05, "loss": 6.9245, "step": 11750 }, { "epoch": 0.6751765448873749, "grad_norm": 1.6484375, "learning_rate": 7.717942442076473e-05, "loss": 6.9743, "step": 11760 }, { "epoch": 0.6757506746024152, "grad_norm": 1.59375, "learning_rate": 7.714039140315514e-05, "loss": 6.9635, "step": 11770 }, { "epoch": 0.6763248043174555, "grad_norm": 1.609375, "learning_rate": 7.710133492215066e-05, "loss": 6.9753, "step": 11780 }, { "epoch": 0.6768989340324958, "grad_norm": 1.5703125, "learning_rate": 7.706225501151641e-05, "loss": 6.9818, "step": 11790 }, { "epoch": 0.677473063747536, "grad_norm": 1.5078125, "learning_rate": 7.702315170503769e-05, "loss": 6.9832, "step": 11800 }, { "epoch": 0.6780471934625764, "grad_norm": 1.6328125, "learning_rate": 7.69840250365201e-05, "loss": 7.0003, "step": 11810 }, { "epoch": 0.6786213231776166, "grad_norm": 1.6328125, "learning_rate": 7.69448750397894e-05, "loss": 6.9929, "step": 11820 }, { "epoch": 0.6791954528926569, "grad_norm": 1.515625, "learning_rate": 7.690570174869149e-05, "loss": 6.97, "step": 11830 }, { "epoch": 0.6797695826076972, "grad_norm": 1.515625, "learning_rate": 7.686650519709249e-05, "loss": 6.9556, "step": 11840 }, { "epoch": 0.6803437123227375, "grad_norm": 1.6015625, "learning_rate": 7.682728541887854e-05, "loss": 6.9593, "step": 11850 }, { "epoch": 0.6809178420377777, "grad_norm": 1.59375, "learning_rate": 7.678804244795593e-05, "loss": 6.9603, "step": 11860 }, { "epoch": 0.6814919717528181, "grad_norm": 1.5859375, "learning_rate": 7.674877631825093e-05, "loss": 6.9669, "step": 11870 }, { "epoch": 0.6820661014678583, "grad_norm": 1.6015625, "learning_rate": 7.670948706370988e-05, "loss": 6.9686, "step": 11880 }, { "epoch": 0.6826402311828986, "grad_norm": 1.6328125, "learning_rate": 7.667017471829914e-05, "loss": 6.9484, "step": 11890 }, { "epoch": 0.6832143608979389, "grad_norm": 1.59375, "learning_rate": 7.663083931600497e-05, "loss": 6.944, "step": 11900 }, { "epoch": 0.6837884906129792, "grad_norm": 1.6875, "learning_rate": 7.659148089083357e-05, "loss": 6.9765, "step": 11910 }, { "epoch": 0.6843626203280194, "grad_norm": 1.5625, "learning_rate": 7.65520994768111e-05, "loss": 7.0007, "step": 11920 }, { "epoch": 0.6849367500430598, "grad_norm": 1.5703125, "learning_rate": 7.651269510798353e-05, "loss": 6.9465, "step": 11930 }, { "epoch": 0.6855108797581, "grad_norm": 1.7421875, "learning_rate": 7.64732678184167e-05, "loss": 6.9472, "step": 11940 }, { "epoch": 0.6860850094731403, "grad_norm": 1.6640625, "learning_rate": 7.64338176421963e-05, "loss": 6.9246, "step": 11950 }, { "epoch": 0.6866591391881806, "grad_norm": 1.484375, "learning_rate": 7.639434461342773e-05, "loss": 6.9619, "step": 11960 }, { "epoch": 0.6872332689032209, "grad_norm": 1.546875, "learning_rate": 7.63548487662362e-05, "loss": 6.9673, "step": 11970 }, { "epoch": 0.6878073986182611, "grad_norm": 1.6953125, "learning_rate": 7.631533013476665e-05, "loss": 6.9765, "step": 11980 }, { "epoch": 0.6883815283333015, "grad_norm": 1.6171875, "learning_rate": 7.627578875318372e-05, "loss": 6.9593, "step": 11990 }, { "epoch": 0.6889556580483417, "grad_norm": 1.5546875, "learning_rate": 7.623622465567166e-05, "loss": 6.9462, "step": 12000 }, { "epoch": 0.689529787763382, "grad_norm": 1.53125, "learning_rate": 7.619663787643441e-05, "loss": 6.9357, "step": 12010 }, { "epoch": 0.6901039174784223, "grad_norm": 1.6015625, "learning_rate": 7.615702844969553e-05, "loss": 6.9577, "step": 12020 }, { "epoch": 0.6906780471934626, "grad_norm": 1.6015625, "learning_rate": 7.611739640969813e-05, "loss": 6.9309, "step": 12030 }, { "epoch": 0.6912521769085028, "grad_norm": 1.65625, "learning_rate": 7.607774179070485e-05, "loss": 6.9118, "step": 12040 }, { "epoch": 0.6918263066235432, "grad_norm": 1.6484375, "learning_rate": 7.603806462699792e-05, "loss": 6.976, "step": 12050 }, { "epoch": 0.6924004363385834, "grad_norm": 1.9375, "learning_rate": 7.599836495287898e-05, "loss": 6.9466, "step": 12060 }, { "epoch": 0.6929745660536237, "grad_norm": 1.5546875, "learning_rate": 7.59586428026692e-05, "loss": 6.9853, "step": 12070 }, { "epoch": 0.693548695768664, "grad_norm": 1.6796875, "learning_rate": 7.591889821070913e-05, "loss": 6.9509, "step": 12080 }, { "epoch": 0.6941228254837043, "grad_norm": 1.5625, "learning_rate": 7.587913121135875e-05, "loss": 6.9566, "step": 12090 }, { "epoch": 0.6946969551987445, "grad_norm": 1.515625, "learning_rate": 7.583934183899738e-05, "loss": 6.9765, "step": 12100 }, { "epoch": 0.6952710849137849, "grad_norm": 1.515625, "learning_rate": 7.579953012802374e-05, "loss": 6.9199, "step": 12110 }, { "epoch": 0.6958452146288251, "grad_norm": 1.6484375, "learning_rate": 7.57596961128558e-05, "loss": 6.9372, "step": 12120 }, { "epoch": 0.6964193443438654, "grad_norm": 1.6484375, "learning_rate": 7.571983982793086e-05, "loss": 6.9888, "step": 12130 }, { "epoch": 0.6969934740589057, "grad_norm": 1.75, "learning_rate": 7.567996130770543e-05, "loss": 6.9627, "step": 12140 }, { "epoch": 0.697567603773946, "grad_norm": 1.59375, "learning_rate": 7.564006058665525e-05, "loss": 6.9265, "step": 12150 }, { "epoch": 0.6981417334889862, "grad_norm": 1.59375, "learning_rate": 7.560013769927532e-05, "loss": 6.9392, "step": 12160 }, { "epoch": 0.6987158632040266, "grad_norm": 1.796875, "learning_rate": 7.556019268007972e-05, "loss": 6.9563, "step": 12170 }, { "epoch": 0.6992899929190668, "grad_norm": 1.625, "learning_rate": 7.55202255636017e-05, "loss": 6.9561, "step": 12180 }, { "epoch": 0.6998641226341071, "grad_norm": 1.6328125, "learning_rate": 7.548023638439359e-05, "loss": 6.9558, "step": 12190 }, { "epoch": 0.7004382523491475, "grad_norm": 1.5859375, "learning_rate": 7.544022517702684e-05, "loss": 6.9474, "step": 12200 }, { "epoch": 0.7010123820641877, "grad_norm": 1.609375, "learning_rate": 7.54001919760919e-05, "loss": 6.973, "step": 12210 }, { "epoch": 0.701586511779228, "grad_norm": 1.625, "learning_rate": 7.536013681619822e-05, "loss": 6.9548, "step": 12220 }, { "epoch": 0.7021606414942683, "grad_norm": 1.5546875, "learning_rate": 7.532005973197431e-05, "loss": 6.9928, "step": 12230 }, { "epoch": 0.7027347712093086, "grad_norm": 1.640625, "learning_rate": 7.527996075806757e-05, "loss": 6.9436, "step": 12240 }, { "epoch": 0.7033089009243488, "grad_norm": 1.515625, "learning_rate": 7.523983992914435e-05, "loss": 6.9541, "step": 12250 }, { "epoch": 0.7038830306393892, "grad_norm": 1.5546875, "learning_rate": 7.519969727988984e-05, "loss": 6.9461, "step": 12260 }, { "epoch": 0.7044571603544294, "grad_norm": 1.578125, "learning_rate": 7.51595328450082e-05, "loss": 6.9568, "step": 12270 }, { "epoch": 0.7050312900694697, "grad_norm": 1.6484375, "learning_rate": 7.511934665922232e-05, "loss": 7.0033, "step": 12280 }, { "epoch": 0.70560541978451, "grad_norm": 1.8984375, "learning_rate": 7.507913875727397e-05, "loss": 6.9849, "step": 12290 }, { "epoch": 0.7061795494995503, "grad_norm": 1.6015625, "learning_rate": 7.503890917392361e-05, "loss": 6.9881, "step": 12300 }, { "epoch": 0.7067536792145905, "grad_norm": 1.546875, "learning_rate": 7.499865794395057e-05, "loss": 6.9328, "step": 12310 }, { "epoch": 0.7073278089296309, "grad_norm": 1.5390625, "learning_rate": 7.495838510215276e-05, "loss": 6.9288, "step": 12320 }, { "epoch": 0.7079019386446711, "grad_norm": 1.65625, "learning_rate": 7.491809068334685e-05, "loss": 6.9334, "step": 12330 }, { "epoch": 0.7084760683597114, "grad_norm": 1.609375, "learning_rate": 7.487777472236815e-05, "loss": 6.9809, "step": 12340 }, { "epoch": 0.7090501980747517, "grad_norm": 1.75, "learning_rate": 7.48374372540706e-05, "loss": 6.9187, "step": 12350 }, { "epoch": 0.709624327789792, "grad_norm": 1.6015625, "learning_rate": 7.47970783133267e-05, "loss": 6.9398, "step": 12360 }, { "epoch": 0.7101984575048322, "grad_norm": 1.6328125, "learning_rate": 7.475669793502755e-05, "loss": 6.9453, "step": 12370 }, { "epoch": 0.7107725872198726, "grad_norm": 1.609375, "learning_rate": 7.471629615408278e-05, "loss": 6.9427, "step": 12380 }, { "epoch": 0.7113467169349128, "grad_norm": 1.5859375, "learning_rate": 7.467587300542049e-05, "loss": 6.9699, "step": 12390 }, { "epoch": 0.7119208466499531, "grad_norm": 1.6171875, "learning_rate": 7.463542852398728e-05, "loss": 6.9523, "step": 12400 }, { "epoch": 0.7124949763649934, "grad_norm": 1.5703125, "learning_rate": 7.459496274474822e-05, "loss": 6.9642, "step": 12410 }, { "epoch": 0.7130691060800337, "grad_norm": 1.609375, "learning_rate": 7.455447570268673e-05, "loss": 6.9485, "step": 12420 }, { "epoch": 0.7136432357950739, "grad_norm": 1.6640625, "learning_rate": 7.451396743280465e-05, "loss": 6.922, "step": 12430 }, { "epoch": 0.7142173655101143, "grad_norm": 1.625, "learning_rate": 7.447343797012218e-05, "loss": 6.9302, "step": 12440 }, { "epoch": 0.7147914952251545, "grad_norm": 1.65625, "learning_rate": 7.443288734967782e-05, "loss": 6.9553, "step": 12450 }, { "epoch": 0.7153656249401948, "grad_norm": 1.5859375, "learning_rate": 7.439231560652834e-05, "loss": 6.9574, "step": 12460 }, { "epoch": 0.7159397546552351, "grad_norm": 1.6171875, "learning_rate": 7.435172277574885e-05, "loss": 6.9633, "step": 12470 }, { "epoch": 0.7165138843702754, "grad_norm": 1.6015625, "learning_rate": 7.431110889243259e-05, "loss": 6.9022, "step": 12480 }, { "epoch": 0.7170880140853156, "grad_norm": 1.6015625, "learning_rate": 7.427047399169108e-05, "loss": 6.9603, "step": 12490 }, { "epoch": 0.717662143800356, "grad_norm": 1.59375, "learning_rate": 7.422981810865397e-05, "loss": 6.9143, "step": 12500 }, { "epoch": 0.7182362735153962, "grad_norm": 1.6328125, "learning_rate": 7.418914127846906e-05, "loss": 6.917, "step": 12510 }, { "epoch": 0.7188104032304365, "grad_norm": 1.484375, "learning_rate": 7.414844353630226e-05, "loss": 6.9381, "step": 12520 }, { "epoch": 0.7193845329454768, "grad_norm": 1.5859375, "learning_rate": 7.410772491733756e-05, "loss": 6.9498, "step": 12530 }, { "epoch": 0.7199586626605171, "grad_norm": 1.4375, "learning_rate": 7.406698545677698e-05, "loss": 6.9534, "step": 12540 }, { "epoch": 0.7205327923755573, "grad_norm": 1.5390625, "learning_rate": 7.40262251898406e-05, "loss": 6.9452, "step": 12550 }, { "epoch": 0.7211069220905977, "grad_norm": 1.46875, "learning_rate": 7.398544415176645e-05, "loss": 6.9431, "step": 12560 }, { "epoch": 0.7216810518056379, "grad_norm": 1.5859375, "learning_rate": 7.394464237781053e-05, "loss": 6.9655, "step": 12570 }, { "epoch": 0.7222551815206782, "grad_norm": 1.671875, "learning_rate": 7.390381990324674e-05, "loss": 6.9516, "step": 12580 }, { "epoch": 0.7228293112357185, "grad_norm": 1.5859375, "learning_rate": 7.386297676336696e-05, "loss": 6.9533, "step": 12590 }, { "epoch": 0.7234034409507588, "grad_norm": 1.6171875, "learning_rate": 7.382211299348081e-05, "loss": 6.9604, "step": 12600 }, { "epoch": 0.723977570665799, "grad_norm": 1.6796875, "learning_rate": 7.378122862891585e-05, "loss": 6.9449, "step": 12610 }, { "epoch": 0.7245517003808394, "grad_norm": 1.5625, "learning_rate": 7.37403237050174e-05, "loss": 6.9143, "step": 12620 }, { "epoch": 0.7251258300958796, "grad_norm": 1.5546875, "learning_rate": 7.369939825714856e-05, "loss": 6.9502, "step": 12630 }, { "epoch": 0.72569995981092, "grad_norm": 1.578125, "learning_rate": 7.365845232069019e-05, "loss": 6.9382, "step": 12640 }, { "epoch": 0.7262740895259603, "grad_norm": 1.5546875, "learning_rate": 7.36174859310408e-05, "loss": 6.9101, "step": 12650 }, { "epoch": 0.7268482192410005, "grad_norm": 1.6875, "learning_rate": 7.357649912361668e-05, "loss": 6.9055, "step": 12660 }, { "epoch": 0.7274223489560409, "grad_norm": 1.5234375, "learning_rate": 7.353549193385168e-05, "loss": 6.9336, "step": 12670 }, { "epoch": 0.7279964786710811, "grad_norm": 1.546875, "learning_rate": 7.349446439719734e-05, "loss": 6.9425, "step": 12680 }, { "epoch": 0.7285706083861214, "grad_norm": 1.734375, "learning_rate": 7.345341654912274e-05, "loss": 6.9389, "step": 12690 }, { "epoch": 0.7291447381011616, "grad_norm": 1.6328125, "learning_rate": 7.341234842511456e-05, "loss": 6.9289, "step": 12700 }, { "epoch": 0.729718867816202, "grad_norm": 1.546875, "learning_rate": 7.337126006067699e-05, "loss": 6.9337, "step": 12710 }, { "epoch": 0.7302929975312422, "grad_norm": 1.609375, "learning_rate": 7.333015149133169e-05, "loss": 6.9392, "step": 12720 }, { "epoch": 0.7308671272462826, "grad_norm": 1.5546875, "learning_rate": 7.328902275261785e-05, "loss": 6.9751, "step": 12730 }, { "epoch": 0.7314412569613228, "grad_norm": 1.5078125, "learning_rate": 7.324787388009204e-05, "loss": 6.9742, "step": 12740 }, { "epoch": 0.7320153866763631, "grad_norm": 1.6328125, "learning_rate": 7.320670490932827e-05, "loss": 6.9776, "step": 12750 }, { "epoch": 0.7325895163914034, "grad_norm": 1.5625, "learning_rate": 7.31655158759179e-05, "loss": 6.9609, "step": 12760 }, { "epoch": 0.7331636461064437, "grad_norm": 1.640625, "learning_rate": 7.312430681546966e-05, "loss": 6.9548, "step": 12770 }, { "epoch": 0.7337377758214839, "grad_norm": 1.5625, "learning_rate": 7.308307776360959e-05, "loss": 6.9416, "step": 12780 }, { "epoch": 0.7343119055365243, "grad_norm": 1.6328125, "learning_rate": 7.3041828755981e-05, "loss": 6.9397, "step": 12790 }, { "epoch": 0.7348860352515645, "grad_norm": 1.578125, "learning_rate": 7.300055982824443e-05, "loss": 6.938, "step": 12800 }, { "epoch": 0.7354601649666048, "grad_norm": 1.6484375, "learning_rate": 7.295927101607771e-05, "loss": 6.9473, "step": 12810 }, { "epoch": 0.736034294681645, "grad_norm": 1.671875, "learning_rate": 7.29179623551758e-05, "loss": 6.9411, "step": 12820 }, { "epoch": 0.7366084243966854, "grad_norm": 1.6171875, "learning_rate": 7.287663388125083e-05, "loss": 6.9525, "step": 12830 }, { "epoch": 0.7371825541117256, "grad_norm": 1.578125, "learning_rate": 7.283528563003208e-05, "loss": 6.9232, "step": 12840 }, { "epoch": 0.737756683826766, "grad_norm": 1.5625, "learning_rate": 7.27939176372659e-05, "loss": 6.9248, "step": 12850 }, { "epoch": 0.7383308135418062, "grad_norm": 1.5703125, "learning_rate": 7.275252993871576e-05, "loss": 6.9764, "step": 12860 }, { "epoch": 0.7389049432568465, "grad_norm": 1.625, "learning_rate": 7.27111225701621e-05, "loss": 6.9333, "step": 12870 }, { "epoch": 0.7394790729718868, "grad_norm": 1.5546875, "learning_rate": 7.266969556740239e-05, "loss": 6.9455, "step": 12880 }, { "epoch": 0.7400532026869271, "grad_norm": 1.59375, "learning_rate": 7.262824896625107e-05, "loss": 6.919, "step": 12890 }, { "epoch": 0.7406273324019673, "grad_norm": 1.5546875, "learning_rate": 7.258678280253954e-05, "loss": 6.9078, "step": 12900 }, { "epoch": 0.7412014621170077, "grad_norm": 1.5859375, "learning_rate": 7.254529711211612e-05, "loss": 6.9439, "step": 12910 }, { "epoch": 0.7417755918320479, "grad_norm": 1.6171875, "learning_rate": 7.2503791930846e-05, "loss": 6.9729, "step": 12920 }, { "epoch": 0.7423497215470882, "grad_norm": 1.65625, "learning_rate": 7.246226729461117e-05, "loss": 6.9428, "step": 12930 }, { "epoch": 0.7429238512621285, "grad_norm": 1.625, "learning_rate": 7.242072323931051e-05, "loss": 6.9696, "step": 12940 }, { "epoch": 0.7434979809771688, "grad_norm": 1.671875, "learning_rate": 7.237915980085966e-05, "loss": 6.9893, "step": 12950 }, { "epoch": 0.744072110692209, "grad_norm": 1.65625, "learning_rate": 7.233757701519103e-05, "loss": 6.9275, "step": 12960 }, { "epoch": 0.7446462404072494, "grad_norm": 1.5234375, "learning_rate": 7.229597491825374e-05, "loss": 6.9448, "step": 12970 }, { "epoch": 0.7452203701222896, "grad_norm": 1.6171875, "learning_rate": 7.22543535460136e-05, "loss": 6.9222, "step": 12980 }, { "epoch": 0.7457944998373299, "grad_norm": 1.65625, "learning_rate": 7.221271293445308e-05, "loss": 6.9139, "step": 12990 }, { "epoch": 0.7463686295523702, "grad_norm": 1.53125, "learning_rate": 7.217105311957135e-05, "loss": 6.9575, "step": 13000 }, { "epoch": 0.7469427592674105, "grad_norm": 1.65625, "learning_rate": 7.212937413738408e-05, "loss": 6.9712, "step": 13010 }, { "epoch": 0.7475168889824507, "grad_norm": 1.5625, "learning_rate": 7.208767602392354e-05, "loss": 6.9427, "step": 13020 }, { "epoch": 0.7480910186974911, "grad_norm": 1.6640625, "learning_rate": 7.204595881523862e-05, "loss": 6.8905, "step": 13030 }, { "epoch": 0.7486651484125313, "grad_norm": 1.625, "learning_rate": 7.200422254739463e-05, "loss": 6.9344, "step": 13040 }, { "epoch": 0.7492392781275716, "grad_norm": 1.6328125, "learning_rate": 7.196246725647338e-05, "loss": 6.9794, "step": 13050 }, { "epoch": 0.7498134078426119, "grad_norm": 1.59375, "learning_rate": 7.19206929785731e-05, "loss": 6.9557, "step": 13060 }, { "epoch": 0.7503875375576522, "grad_norm": 1.6171875, "learning_rate": 7.187889974980852e-05, "loss": 6.9585, "step": 13070 }, { "epoch": 0.7509616672726924, "grad_norm": 1.6328125, "learning_rate": 7.183708760631064e-05, "loss": 6.9335, "step": 13080 }, { "epoch": 0.7515357969877328, "grad_norm": 1.5703125, "learning_rate": 7.179525658422693e-05, "loss": 6.9735, "step": 13090 }, { "epoch": 0.7521099267027731, "grad_norm": 1.6328125, "learning_rate": 7.175340671972108e-05, "loss": 6.9229, "step": 13100 }, { "epoch": 0.7526840564178133, "grad_norm": 1.53125, "learning_rate": 7.17115380489731e-05, "loss": 6.952, "step": 13110 }, { "epoch": 0.7532581861328537, "grad_norm": 1.7109375, "learning_rate": 7.166965060817929e-05, "loss": 6.942, "step": 13120 }, { "epoch": 0.7538323158478939, "grad_norm": 1.5859375, "learning_rate": 7.162774443355218e-05, "loss": 6.9544, "step": 13130 }, { "epoch": 0.7544064455629342, "grad_norm": 1.65625, "learning_rate": 7.158581956132042e-05, "loss": 6.9458, "step": 13140 }, { "epoch": 0.7549805752779745, "grad_norm": 1.6015625, "learning_rate": 7.154387602772889e-05, "loss": 6.9283, "step": 13150 }, { "epoch": 0.7555547049930148, "grad_norm": 1.6171875, "learning_rate": 7.150191386903861e-05, "loss": 6.9262, "step": 13160 }, { "epoch": 0.756128834708055, "grad_norm": 1.6796875, "learning_rate": 7.145993312152666e-05, "loss": 6.9766, "step": 13170 }, { "epoch": 0.7567029644230954, "grad_norm": 1.5390625, "learning_rate": 7.141793382148621e-05, "loss": 6.9395, "step": 13180 }, { "epoch": 0.7572770941381356, "grad_norm": 1.6328125, "learning_rate": 7.137591600522649e-05, "loss": 6.9313, "step": 13190 }, { "epoch": 0.7578512238531759, "grad_norm": 1.59375, "learning_rate": 7.133387970907268e-05, "loss": 6.9541, "step": 13200 }, { "epoch": 0.7584253535682162, "grad_norm": 1.6171875, "learning_rate": 7.129182496936602e-05, "loss": 6.9298, "step": 13210 }, { "epoch": 0.7589994832832565, "grad_norm": 1.59375, "learning_rate": 7.124975182246361e-05, "loss": 6.9438, "step": 13220 }, { "epoch": 0.7595736129982967, "grad_norm": 1.65625, "learning_rate": 7.120766030473854e-05, "loss": 6.9108, "step": 13230 }, { "epoch": 0.7601477427133371, "grad_norm": 1.5390625, "learning_rate": 7.116555045257969e-05, "loss": 6.9529, "step": 13240 }, { "epoch": 0.7607218724283773, "grad_norm": 1.5859375, "learning_rate": 7.11234223023919e-05, "loss": 6.9413, "step": 13250 }, { "epoch": 0.7612960021434176, "grad_norm": 1.640625, "learning_rate": 7.108127589059573e-05, "loss": 6.9513, "step": 13260 }, { "epoch": 0.7618701318584579, "grad_norm": 1.515625, "learning_rate": 7.103911125362762e-05, "loss": 6.9433, "step": 13270 }, { "epoch": 0.7624442615734982, "grad_norm": 1.53125, "learning_rate": 7.099692842793964e-05, "loss": 6.9542, "step": 13280 }, { "epoch": 0.7630183912885384, "grad_norm": 1.5859375, "learning_rate": 7.095472744999973e-05, "loss": 6.9232, "step": 13290 }, { "epoch": 0.7635925210035788, "grad_norm": 1.609375, "learning_rate": 7.091250835629143e-05, "loss": 6.8932, "step": 13300 }, { "epoch": 0.764166650718619, "grad_norm": 1.59375, "learning_rate": 7.087027118331397e-05, "loss": 6.9483, "step": 13310 }, { "epoch": 0.7647407804336593, "grad_norm": 1.625, "learning_rate": 7.082801596758219e-05, "loss": 6.9503, "step": 13320 }, { "epoch": 0.7653149101486996, "grad_norm": 1.546875, "learning_rate": 7.078574274562657e-05, "loss": 6.9326, "step": 13330 }, { "epoch": 0.7658890398637399, "grad_norm": 1.6171875, "learning_rate": 7.07434515539931e-05, "loss": 6.9463, "step": 13340 }, { "epoch": 0.7664631695787801, "grad_norm": 1.625, "learning_rate": 7.070114242924337e-05, "loss": 6.9759, "step": 13350 }, { "epoch": 0.7670372992938205, "grad_norm": 1.6640625, "learning_rate": 7.06588154079544e-05, "loss": 6.903, "step": 13360 }, { "epoch": 0.7676114290088607, "grad_norm": 1.484375, "learning_rate": 7.061647052671873e-05, "loss": 6.9557, "step": 13370 }, { "epoch": 0.768185558723901, "grad_norm": 1.6171875, "learning_rate": 7.057410782214438e-05, "loss": 6.9652, "step": 13380 }, { "epoch": 0.7687596884389413, "grad_norm": 1.578125, "learning_rate": 7.053172733085466e-05, "loss": 6.9292, "step": 13390 }, { "epoch": 0.7693338181539816, "grad_norm": 1.59375, "learning_rate": 7.048932908948839e-05, "loss": 6.9163, "step": 13400 }, { "epoch": 0.7699079478690218, "grad_norm": 1.6015625, "learning_rate": 7.04469131346996e-05, "loss": 6.9463, "step": 13410 }, { "epoch": 0.7704820775840622, "grad_norm": 1.5859375, "learning_rate": 7.040447950315779e-05, "loss": 6.9376, "step": 13420 }, { "epoch": 0.7710562072991024, "grad_norm": 1.53125, "learning_rate": 7.03620282315476e-05, "loss": 6.9563, "step": 13430 }, { "epoch": 0.7716303370141427, "grad_norm": 1.59375, "learning_rate": 7.031955935656899e-05, "loss": 6.938, "step": 13440 }, { "epoch": 0.772204466729183, "grad_norm": 1.6015625, "learning_rate": 7.027707291493711e-05, "loss": 6.9266, "step": 13450 }, { "epoch": 0.7727785964442233, "grad_norm": 1.6875, "learning_rate": 7.023456894338235e-05, "loss": 6.9203, "step": 13460 }, { "epoch": 0.7733527261592635, "grad_norm": 1.6015625, "learning_rate": 7.01920474786502e-05, "loss": 6.8945, "step": 13470 }, { "epoch": 0.7739268558743039, "grad_norm": 1.6171875, "learning_rate": 7.01495085575013e-05, "loss": 6.9758, "step": 13480 }, { "epoch": 0.7745009855893441, "grad_norm": 1.640625, "learning_rate": 7.010695221671135e-05, "loss": 6.9387, "step": 13490 }, { "epoch": 0.7750751153043844, "grad_norm": 1.65625, "learning_rate": 7.006437849307115e-05, "loss": 6.9299, "step": 13500 }, { "epoch": 0.7756492450194247, "grad_norm": 2.15625, "learning_rate": 7.00217874233865e-05, "loss": 6.9433, "step": 13510 }, { "epoch": 0.776223374734465, "grad_norm": 1.59375, "learning_rate": 6.997917904447823e-05, "loss": 6.9512, "step": 13520 }, { "epoch": 0.7767975044495052, "grad_norm": 1.6484375, "learning_rate": 6.993655339318208e-05, "loss": 6.9717, "step": 13530 }, { "epoch": 0.7773716341645456, "grad_norm": 1.5625, "learning_rate": 6.989391050634877e-05, "loss": 6.9223, "step": 13540 }, { "epoch": 0.7779457638795859, "grad_norm": 1.6640625, "learning_rate": 6.985125042084388e-05, "loss": 6.9604, "step": 13550 }, { "epoch": 0.7785198935946261, "grad_norm": 1.6171875, "learning_rate": 6.980857317354792e-05, "loss": 6.9727, "step": 13560 }, { "epoch": 0.7790940233096665, "grad_norm": 1.5546875, "learning_rate": 6.976587880135617e-05, "loss": 6.9454, "step": 13570 }, { "epoch": 0.7796681530247067, "grad_norm": 1.671875, "learning_rate": 6.972316734117874e-05, "loss": 6.9135, "step": 13580 }, { "epoch": 0.780242282739747, "grad_norm": 1.6171875, "learning_rate": 6.968043882994054e-05, "loss": 6.9029, "step": 13590 }, { "epoch": 0.7808164124547873, "grad_norm": 1.6015625, "learning_rate": 6.963769330458117e-05, "loss": 6.9762, "step": 13600 }, { "epoch": 0.7813905421698276, "grad_norm": 1.609375, "learning_rate": 6.959493080205499e-05, "loss": 6.9443, "step": 13610 }, { "epoch": 0.7819646718848678, "grad_norm": 1.5390625, "learning_rate": 6.9552151359331e-05, "loss": 6.9077, "step": 13620 }, { "epoch": 0.7825388015999082, "grad_norm": 1.6328125, "learning_rate": 6.950935501339284e-05, "loss": 6.9295, "step": 13630 }, { "epoch": 0.7831129313149484, "grad_norm": 1.5546875, "learning_rate": 6.946654180123883e-05, "loss": 6.9423, "step": 13640 }, { "epoch": 0.7836870610299888, "grad_norm": 1.6015625, "learning_rate": 6.942371175988178e-05, "loss": 6.9018, "step": 13650 }, { "epoch": 0.784261190745029, "grad_norm": 1.65625, "learning_rate": 6.93808649263491e-05, "loss": 6.9466, "step": 13660 }, { "epoch": 0.7848353204600693, "grad_norm": 1.6953125, "learning_rate": 6.933800133768274e-05, "loss": 6.9222, "step": 13670 }, { "epoch": 0.7854094501751095, "grad_norm": 1.6640625, "learning_rate": 6.929512103093905e-05, "loss": 6.892, "step": 13680 }, { "epoch": 0.7859835798901499, "grad_norm": 1.6328125, "learning_rate": 6.925222404318892e-05, "loss": 6.9525, "step": 13690 }, { "epoch": 0.7865577096051901, "grad_norm": 1.59375, "learning_rate": 6.920931041151764e-05, "loss": 6.9537, "step": 13700 }, { "epoch": 0.7871318393202305, "grad_norm": 1.6328125, "learning_rate": 6.916638017302484e-05, "loss": 6.9071, "step": 13710 }, { "epoch": 0.7877059690352707, "grad_norm": 1.578125, "learning_rate": 6.912343336482456e-05, "loss": 6.9564, "step": 13720 }, { "epoch": 0.788280098750311, "grad_norm": 1.578125, "learning_rate": 6.908047002404517e-05, "loss": 6.937, "step": 13730 }, { "epoch": 0.7888542284653512, "grad_norm": 1.640625, "learning_rate": 6.903749018782928e-05, "loss": 6.983, "step": 13740 }, { "epoch": 0.7894283581803916, "grad_norm": 1.625, "learning_rate": 6.899449389333382e-05, "loss": 6.9388, "step": 13750 }, { "epoch": 0.7900024878954318, "grad_norm": 1.65625, "learning_rate": 6.89514811777299e-05, "loss": 6.9097, "step": 13760 }, { "epoch": 0.7905766176104722, "grad_norm": 1.546875, "learning_rate": 6.890845207820286e-05, "loss": 6.9503, "step": 13770 }, { "epoch": 0.7911507473255124, "grad_norm": 1.6171875, "learning_rate": 6.886540663195218e-05, "loss": 6.9628, "step": 13780 }, { "epoch": 0.7917248770405527, "grad_norm": 1.5625, "learning_rate": 6.882234487619149e-05, "loss": 6.921, "step": 13790 }, { "epoch": 0.792299006755593, "grad_norm": 1.53125, "learning_rate": 6.877926684814853e-05, "loss": 6.901, "step": 13800 }, { "epoch": 0.7928731364706333, "grad_norm": 1.5625, "learning_rate": 6.873617258506504e-05, "loss": 6.9464, "step": 13810 }, { "epoch": 0.7934472661856735, "grad_norm": 1.5703125, "learning_rate": 6.86930621241969e-05, "loss": 6.9005, "step": 13820 }, { "epoch": 0.7940213959007139, "grad_norm": 1.5546875, "learning_rate": 6.864993550281393e-05, "loss": 6.9325, "step": 13830 }, { "epoch": 0.7945955256157541, "grad_norm": 1.6171875, "learning_rate": 6.86067927581999e-05, "loss": 6.9075, "step": 13840 }, { "epoch": 0.7951696553307944, "grad_norm": 1.5625, "learning_rate": 6.856363392765257e-05, "loss": 6.8993, "step": 13850 }, { "epoch": 0.7957437850458347, "grad_norm": 1.5859375, "learning_rate": 6.85204590484836e-05, "loss": 6.9175, "step": 13860 }, { "epoch": 0.796317914760875, "grad_norm": 1.6484375, "learning_rate": 6.84772681580185e-05, "loss": 6.9469, "step": 13870 }, { "epoch": 0.7968920444759152, "grad_norm": 1.546875, "learning_rate": 6.843406129359661e-05, "loss": 6.9306, "step": 13880 }, { "epoch": 0.7974661741909556, "grad_norm": 1.515625, "learning_rate": 6.839083849257113e-05, "loss": 6.925, "step": 13890 }, { "epoch": 0.7980403039059958, "grad_norm": 1.609375, "learning_rate": 6.8347599792309e-05, "loss": 6.9039, "step": 13900 }, { "epoch": 0.7986144336210361, "grad_norm": 1.5546875, "learning_rate": 6.830434523019091e-05, "loss": 6.9328, "step": 13910 }, { "epoch": 0.7991885633360764, "grad_norm": 1.640625, "learning_rate": 6.826107484361129e-05, "loss": 6.9284, "step": 13920 }, { "epoch": 0.7997626930511167, "grad_norm": 1.5703125, "learning_rate": 6.821778866997822e-05, "loss": 6.9301, "step": 13930 }, { "epoch": 0.8003368227661569, "grad_norm": 1.59375, "learning_rate": 6.817448674671341e-05, "loss": 6.9278, "step": 13940 }, { "epoch": 0.8009109524811973, "grad_norm": 1.5390625, "learning_rate": 6.813116911125225e-05, "loss": 6.9447, "step": 13950 }, { "epoch": 0.8014850821962375, "grad_norm": 1.59375, "learning_rate": 6.808783580104365e-05, "loss": 6.9523, "step": 13960 }, { "epoch": 0.8020592119112778, "grad_norm": 1.578125, "learning_rate": 6.804448685355011e-05, "loss": 6.9536, "step": 13970 }, { "epoch": 0.802633341626318, "grad_norm": 1.53125, "learning_rate": 6.800112230624764e-05, "loss": 6.9397, "step": 13980 }, { "epoch": 0.8032074713413584, "grad_norm": 1.578125, "learning_rate": 6.795774219662569e-05, "loss": 6.9319, "step": 13990 }, { "epoch": 0.8037816010563986, "grad_norm": 1.515625, "learning_rate": 6.791434656218729e-05, "loss": 7.0099, "step": 14000 }, { "epoch": 0.804355730771439, "grad_norm": 1.65625, "learning_rate": 6.787093544044873e-05, "loss": 6.9383, "step": 14010 }, { "epoch": 0.8049298604864793, "grad_norm": 1.5859375, "learning_rate": 6.782750886893981e-05, "loss": 6.952, "step": 14020 }, { "epoch": 0.8055039902015195, "grad_norm": 1.546875, "learning_rate": 6.778406688520362e-05, "loss": 6.9498, "step": 14030 }, { "epoch": 0.8060781199165599, "grad_norm": 1.609375, "learning_rate": 6.774060952679661e-05, "loss": 6.9687, "step": 14040 }, { "epoch": 0.8066522496316001, "grad_norm": 1.484375, "learning_rate": 6.769713683128851e-05, "loss": 6.9473, "step": 14050 }, { "epoch": 0.8072263793466404, "grad_norm": 1.625, "learning_rate": 6.76536488362623e-05, "loss": 6.924, "step": 14060 }, { "epoch": 0.8078005090616807, "grad_norm": 1.6953125, "learning_rate": 6.761014557931421e-05, "loss": 6.9409, "step": 14070 }, { "epoch": 0.808374638776721, "grad_norm": 1.65625, "learning_rate": 6.756662709805363e-05, "loss": 6.9435, "step": 14080 }, { "epoch": 0.8089487684917612, "grad_norm": 1.609375, "learning_rate": 6.752309343010316e-05, "loss": 6.9117, "step": 14090 }, { "epoch": 0.8095228982068016, "grad_norm": 1.6015625, "learning_rate": 6.747954461309847e-05, "loss": 6.9805, "step": 14100 }, { "epoch": 0.8100970279218418, "grad_norm": 1.625, "learning_rate": 6.743598068468837e-05, "loss": 6.9137, "step": 14110 }, { "epoch": 0.8106711576368821, "grad_norm": 1.59375, "learning_rate": 6.739240168253471e-05, "loss": 6.9724, "step": 14120 }, { "epoch": 0.8112452873519224, "grad_norm": 1.6015625, "learning_rate": 6.734880764431242e-05, "loss": 6.8846, "step": 14130 }, { "epoch": 0.8118194170669627, "grad_norm": 1.6015625, "learning_rate": 6.730519860770935e-05, "loss": 6.9561, "step": 14140 }, { "epoch": 0.8123935467820029, "grad_norm": 1.53125, "learning_rate": 6.726157461042637e-05, "loss": 6.9365, "step": 14150 }, { "epoch": 0.8129676764970433, "grad_norm": 1.5703125, "learning_rate": 6.721793569017727e-05, "loss": 6.8861, "step": 14160 }, { "epoch": 0.8135418062120835, "grad_norm": 1.546875, "learning_rate": 6.717428188468875e-05, "loss": 6.9391, "step": 14170 }, { "epoch": 0.8141159359271238, "grad_norm": 1.640625, "learning_rate": 6.713061323170038e-05, "loss": 6.9643, "step": 14180 }, { "epoch": 0.8146900656421641, "grad_norm": 1.5859375, "learning_rate": 6.708692976896454e-05, "loss": 6.9478, "step": 14190 }, { "epoch": 0.8152641953572044, "grad_norm": 1.609375, "learning_rate": 6.704323153424643e-05, "loss": 6.8882, "step": 14200 }, { "epoch": 0.8158383250722446, "grad_norm": 1.734375, "learning_rate": 6.699951856532405e-05, "loss": 6.9549, "step": 14210 }, { "epoch": 0.816412454787285, "grad_norm": 1.5625, "learning_rate": 6.695579089998808e-05, "loss": 6.9198, "step": 14220 }, { "epoch": 0.8169865845023252, "grad_norm": 1.5546875, "learning_rate": 6.691204857604195e-05, "loss": 6.9332, "step": 14230 }, { "epoch": 0.8175607142173655, "grad_norm": 1.6171875, "learning_rate": 6.686829163130173e-05, "loss": 6.968, "step": 14240 }, { "epoch": 0.8181348439324058, "grad_norm": 1.7421875, "learning_rate": 6.682452010359616e-05, "loss": 6.9519, "step": 14250 }, { "epoch": 0.8187089736474461, "grad_norm": 1.71875, "learning_rate": 6.678073403076658e-05, "loss": 6.9242, "step": 14260 }, { "epoch": 0.8192831033624863, "grad_norm": 1.6328125, "learning_rate": 6.673693345066691e-05, "loss": 6.961, "step": 14270 }, { "epoch": 0.8198572330775267, "grad_norm": 1.59375, "learning_rate": 6.669311840116357e-05, "loss": 6.9111, "step": 14280 }, { "epoch": 0.8204313627925669, "grad_norm": 1.5859375, "learning_rate": 6.664928892013553e-05, "loss": 6.9245, "step": 14290 }, { "epoch": 0.8210054925076072, "grad_norm": 1.546875, "learning_rate": 6.660544504547423e-05, "loss": 6.9198, "step": 14300 }, { "epoch": 0.8215796222226475, "grad_norm": 1.5859375, "learning_rate": 6.656158681508357e-05, "loss": 6.9587, "step": 14310 }, { "epoch": 0.8221537519376878, "grad_norm": 1.6171875, "learning_rate": 6.651771426687983e-05, "loss": 6.9579, "step": 14320 }, { "epoch": 0.822727881652728, "grad_norm": 1.6015625, "learning_rate": 6.647382743879166e-05, "loss": 6.9472, "step": 14330 }, { "epoch": 0.8233020113677684, "grad_norm": 1.5390625, "learning_rate": 6.642992636876007e-05, "loss": 6.9644, "step": 14340 }, { "epoch": 0.8238761410828086, "grad_norm": 1.6015625, "learning_rate": 6.638601109473842e-05, "loss": 6.9212, "step": 14350 }, { "epoch": 0.8244502707978489, "grad_norm": 1.609375, "learning_rate": 6.634208165469231e-05, "loss": 6.9237, "step": 14360 }, { "epoch": 0.8250244005128892, "grad_norm": 1.625, "learning_rate": 6.629813808659958e-05, "loss": 6.9283, "step": 14370 }, { "epoch": 0.8255985302279295, "grad_norm": 1.578125, "learning_rate": 6.625418042845028e-05, "loss": 6.9112, "step": 14380 }, { "epoch": 0.8261726599429697, "grad_norm": 1.6328125, "learning_rate": 6.621020871824668e-05, "loss": 6.9496, "step": 14390 }, { "epoch": 0.8267467896580101, "grad_norm": 1.65625, "learning_rate": 6.616622299400319e-05, "loss": 6.9288, "step": 14400 }, { "epoch": 0.8273209193730503, "grad_norm": 1.5, "learning_rate": 6.612222329374631e-05, "loss": 6.923, "step": 14410 }, { "epoch": 0.8278950490880906, "grad_norm": 1.6953125, "learning_rate": 6.607820965551462e-05, "loss": 6.9171, "step": 14420 }, { "epoch": 0.8284691788031309, "grad_norm": 1.6484375, "learning_rate": 6.603418211735876e-05, "loss": 6.9524, "step": 14430 }, { "epoch": 0.8290433085181712, "grad_norm": 1.65625, "learning_rate": 6.599014071734145e-05, "loss": 6.9356, "step": 14440 }, { "epoch": 0.8296174382332114, "grad_norm": 1.53125, "learning_rate": 6.594608549353725e-05, "loss": 6.8981, "step": 14450 }, { "epoch": 0.8301915679482518, "grad_norm": 1.6640625, "learning_rate": 6.59020164840328e-05, "loss": 6.9348, "step": 14460 }, { "epoch": 0.8307656976632921, "grad_norm": 1.6328125, "learning_rate": 6.585793372692663e-05, "loss": 6.9244, "step": 14470 }, { "epoch": 0.8313398273783323, "grad_norm": 1.59375, "learning_rate": 6.581383726032912e-05, "loss": 6.9678, "step": 14480 }, { "epoch": 0.8319139570933727, "grad_norm": 1.6015625, "learning_rate": 6.57697271223625e-05, "loss": 6.9098, "step": 14490 }, { "epoch": 0.8324880868084129, "grad_norm": 1.6796875, "learning_rate": 6.572560335116087e-05, "loss": 6.9442, "step": 14500 }, { "epoch": 0.8330622165234532, "grad_norm": 1.515625, "learning_rate": 6.568146598487007e-05, "loss": 6.9094, "step": 14510 }, { "epoch": 0.8336363462384935, "grad_norm": 1.640625, "learning_rate": 6.563731506164772e-05, "loss": 6.9335, "step": 14520 }, { "epoch": 0.8342104759535338, "grad_norm": 1.6015625, "learning_rate": 6.559315061966314e-05, "loss": 6.9398, "step": 14530 }, { "epoch": 0.834784605668574, "grad_norm": 1.5234375, "learning_rate": 6.554897269709735e-05, "loss": 6.9264, "step": 14540 }, { "epoch": 0.8353587353836144, "grad_norm": 1.5703125, "learning_rate": 6.550478133214304e-05, "loss": 6.9198, "step": 14550 }, { "epoch": 0.8359328650986546, "grad_norm": 1.515625, "learning_rate": 6.546057656300447e-05, "loss": 6.9202, "step": 14560 }, { "epoch": 0.836506994813695, "grad_norm": 1.6171875, "learning_rate": 6.541635842789752e-05, "loss": 6.9135, "step": 14570 }, { "epoch": 0.8370811245287352, "grad_norm": 1.609375, "learning_rate": 6.537212696504968e-05, "loss": 6.9019, "step": 14580 }, { "epoch": 0.8376552542437755, "grad_norm": 1.6171875, "learning_rate": 6.532788221269985e-05, "loss": 6.891, "step": 14590 }, { "epoch": 0.8382293839588157, "grad_norm": 1.6484375, "learning_rate": 6.528362420909848e-05, "loss": 6.9124, "step": 14600 }, { "epoch": 0.8388035136738561, "grad_norm": 1.59375, "learning_rate": 6.52393529925075e-05, "loss": 6.9593, "step": 14610 }, { "epoch": 0.8393776433888963, "grad_norm": 1.640625, "learning_rate": 6.519506860120024e-05, "loss": 6.9323, "step": 14620 }, { "epoch": 0.8399517731039366, "grad_norm": 1.59375, "learning_rate": 6.515077107346139e-05, "loss": 6.9259, "step": 14630 }, { "epoch": 0.8405259028189769, "grad_norm": 1.6171875, "learning_rate": 6.5106460447587e-05, "loss": 6.9423, "step": 14640 }, { "epoch": 0.8411000325340172, "grad_norm": 1.7421875, "learning_rate": 6.506213676188453e-05, "loss": 6.9264, "step": 14650 }, { "epoch": 0.8416741622490574, "grad_norm": 1.6484375, "learning_rate": 6.501780005467262e-05, "loss": 6.9357, "step": 14660 }, { "epoch": 0.8422482919640978, "grad_norm": 1.6640625, "learning_rate": 6.497345036428124e-05, "loss": 6.9022, "step": 14670 }, { "epoch": 0.842822421679138, "grad_norm": 1.625, "learning_rate": 6.492908772905154e-05, "loss": 6.9009, "step": 14680 }, { "epoch": 0.8433965513941784, "grad_norm": 1.6640625, "learning_rate": 6.488471218733588e-05, "loss": 6.8893, "step": 14690 }, { "epoch": 0.8439706811092186, "grad_norm": 1.5859375, "learning_rate": 6.484032377749777e-05, "loss": 6.9118, "step": 14700 }, { "epoch": 0.8445448108242589, "grad_norm": 1.5859375, "learning_rate": 6.479592253791187e-05, "loss": 6.8947, "step": 14710 }, { "epoch": 0.8451189405392991, "grad_norm": 1.578125, "learning_rate": 6.47515085069639e-05, "loss": 6.9224, "step": 14720 }, { "epoch": 0.8456930702543395, "grad_norm": 1.5703125, "learning_rate": 6.470708172305065e-05, "loss": 6.9317, "step": 14730 }, { "epoch": 0.8462671999693797, "grad_norm": 1.4921875, "learning_rate": 6.466264222457997e-05, "loss": 6.9228, "step": 14740 }, { "epoch": 0.84684132968442, "grad_norm": 1.515625, "learning_rate": 6.46181900499706e-05, "loss": 6.9008, "step": 14750 }, { "epoch": 0.8474154593994603, "grad_norm": 1.609375, "learning_rate": 6.457372523765238e-05, "loss": 6.928, "step": 14760 }, { "epoch": 0.8479895891145006, "grad_norm": 1.640625, "learning_rate": 6.452924782606595e-05, "loss": 6.9059, "step": 14770 }, { "epoch": 0.8485637188295408, "grad_norm": 1.5078125, "learning_rate": 6.448475785366291e-05, "loss": 6.9193, "step": 14780 }, { "epoch": 0.8491378485445812, "grad_norm": 1.5703125, "learning_rate": 6.444025535890573e-05, "loss": 6.904, "step": 14790 }, { "epoch": 0.8497119782596214, "grad_norm": 1.5859375, "learning_rate": 6.439574038026766e-05, "loss": 6.9381, "step": 14800 }, { "epoch": 0.8502861079746618, "grad_norm": 1.5546875, "learning_rate": 6.435121295623276e-05, "loss": 6.9774, "step": 14810 }, { "epoch": 0.850860237689702, "grad_norm": 1.53125, "learning_rate": 6.430667312529585e-05, "loss": 6.9291, "step": 14820 }, { "epoch": 0.8514343674047423, "grad_norm": 1.578125, "learning_rate": 6.426212092596248e-05, "loss": 6.9351, "step": 14830 }, { "epoch": 0.8520084971197825, "grad_norm": 1.6875, "learning_rate": 6.421755639674889e-05, "loss": 6.9359, "step": 14840 }, { "epoch": 0.8525826268348229, "grad_norm": 1.640625, "learning_rate": 6.4172979576182e-05, "loss": 6.8811, "step": 14850 }, { "epoch": 0.8531567565498631, "grad_norm": 1.671875, "learning_rate": 6.412839050279929e-05, "loss": 6.9069, "step": 14860 }, { "epoch": 0.8537308862649035, "grad_norm": 1.671875, "learning_rate": 6.408378921514894e-05, "loss": 6.936, "step": 14870 }, { "epoch": 0.8543050159799437, "grad_norm": 1.7578125, "learning_rate": 6.403917575178959e-05, "loss": 6.9129, "step": 14880 }, { "epoch": 0.854879145694984, "grad_norm": 1.46875, "learning_rate": 6.399455015129043e-05, "loss": 6.912, "step": 14890 }, { "epoch": 0.8554532754100242, "grad_norm": 1.609375, "learning_rate": 6.394991245223121e-05, "loss": 6.9321, "step": 14900 }, { "epoch": 0.8560274051250646, "grad_norm": 1.65625, "learning_rate": 6.390526269320202e-05, "loss": 6.8809, "step": 14910 }, { "epoch": 0.8566015348401049, "grad_norm": 1.5703125, "learning_rate": 6.38606009128035e-05, "loss": 6.9148, "step": 14920 }, { "epoch": 0.8571756645551452, "grad_norm": 1.625, "learning_rate": 6.38159271496466e-05, "loss": 6.9322, "step": 14930 }, { "epoch": 0.8577497942701855, "grad_norm": 1.6171875, "learning_rate": 6.377124144235265e-05, "loss": 6.9338, "step": 14940 }, { "epoch": 0.8583239239852257, "grad_norm": 1.5859375, "learning_rate": 6.372654382955334e-05, "loss": 6.8938, "step": 14950 }, { "epoch": 0.8588980537002661, "grad_norm": 1.6328125, "learning_rate": 6.368183434989058e-05, "loss": 6.9226, "step": 14960 }, { "epoch": 0.8594721834153063, "grad_norm": 1.5546875, "learning_rate": 6.363711304201661e-05, "loss": 6.9437, "step": 14970 }, { "epoch": 0.8600463131303466, "grad_norm": 1.59375, "learning_rate": 6.359237994459388e-05, "loss": 6.9135, "step": 14980 }, { "epoch": 0.8606204428453869, "grad_norm": 1.6328125, "learning_rate": 6.354763509629498e-05, "loss": 6.9327, "step": 14990 }, { "epoch": 0.8611945725604272, "grad_norm": 1.6328125, "learning_rate": 6.350287853580273e-05, "loss": 6.8962, "step": 15000 }, { "epoch": 0.8617687022754674, "grad_norm": 1.546875, "learning_rate": 6.345811030181005e-05, "loss": 6.9345, "step": 15010 }, { "epoch": 0.8623428319905078, "grad_norm": 1.6015625, "learning_rate": 6.341333043301993e-05, "loss": 6.9071, "step": 15020 }, { "epoch": 0.862916961705548, "grad_norm": 1.890625, "learning_rate": 6.336853896814543e-05, "loss": 6.9159, "step": 15030 }, { "epoch": 0.8634910914205883, "grad_norm": 1.625, "learning_rate": 6.332373594590964e-05, "loss": 6.9337, "step": 15040 }, { "epoch": 0.8640652211356286, "grad_norm": 1.6484375, "learning_rate": 6.327892140504567e-05, "loss": 6.9085, "step": 15050 }, { "epoch": 0.8646393508506689, "grad_norm": 1.5234375, "learning_rate": 6.323409538429656e-05, "loss": 6.9236, "step": 15060 }, { "epoch": 0.8652134805657091, "grad_norm": 1.5625, "learning_rate": 6.318925792241523e-05, "loss": 6.9405, "step": 15070 }, { "epoch": 0.8657876102807495, "grad_norm": 1.625, "learning_rate": 6.314440905816457e-05, "loss": 6.9579, "step": 15080 }, { "epoch": 0.8663617399957897, "grad_norm": 1.578125, "learning_rate": 6.30995488303173e-05, "loss": 6.9024, "step": 15090 }, { "epoch": 0.86693586971083, "grad_norm": 1.578125, "learning_rate": 6.305467727765592e-05, "loss": 6.9078, "step": 15100 }, { "epoch": 0.8675099994258703, "grad_norm": 1.5703125, "learning_rate": 6.30097944389728e-05, "loss": 6.9246, "step": 15110 }, { "epoch": 0.8680841291409106, "grad_norm": 1.6875, "learning_rate": 6.296490035306999e-05, "loss": 6.9051, "step": 15120 }, { "epoch": 0.8686582588559508, "grad_norm": 1.5390625, "learning_rate": 6.291999505875932e-05, "loss": 6.9181, "step": 15130 }, { "epoch": 0.8692323885709912, "grad_norm": 1.5625, "learning_rate": 6.287507859486228e-05, "loss": 6.9262, "step": 15140 }, { "epoch": 0.8698065182860314, "grad_norm": 1.5625, "learning_rate": 6.283015100021002e-05, "loss": 6.888, "step": 15150 }, { "epoch": 0.8703806480010717, "grad_norm": 1.6484375, "learning_rate": 6.278521231364334e-05, "loss": 6.9464, "step": 15160 }, { "epoch": 0.870954777716112, "grad_norm": 1.59375, "learning_rate": 6.274026257401258e-05, "loss": 6.9193, "step": 15170 }, { "epoch": 0.8715289074311523, "grad_norm": 1.6796875, "learning_rate": 6.269530182017766e-05, "loss": 6.8934, "step": 15180 }, { "epoch": 0.8721030371461925, "grad_norm": 1.5, "learning_rate": 6.265033009100805e-05, "loss": 6.9851, "step": 15190 }, { "epoch": 0.8726771668612329, "grad_norm": 1.546875, "learning_rate": 6.260534742538267e-05, "loss": 6.9182, "step": 15200 }, { "epoch": 0.8732512965762731, "grad_norm": 1.625, "learning_rate": 6.256035386218989e-05, "loss": 6.9386, "step": 15210 }, { "epoch": 0.8738254262913134, "grad_norm": 1.640625, "learning_rate": 6.251534944032754e-05, "loss": 6.9668, "step": 15220 }, { "epoch": 0.8743995560063537, "grad_norm": 1.6328125, "learning_rate": 6.247033419870281e-05, "loss": 6.9103, "step": 15230 }, { "epoch": 0.874973685721394, "grad_norm": 1.625, "learning_rate": 6.242530817623225e-05, "loss": 6.8926, "step": 15240 }, { "epoch": 0.8755478154364342, "grad_norm": 1.5625, "learning_rate": 6.238027141184171e-05, "loss": 6.9194, "step": 15250 }, { "epoch": 0.8761219451514746, "grad_norm": 1.5859375, "learning_rate": 6.23352239444664e-05, "loss": 6.8804, "step": 15260 }, { "epoch": 0.8766960748665148, "grad_norm": 1.5703125, "learning_rate": 6.229016581305067e-05, "loss": 6.9138, "step": 15270 }, { "epoch": 0.8772702045815551, "grad_norm": 1.6328125, "learning_rate": 6.224509705654818e-05, "loss": 6.8961, "step": 15280 }, { "epoch": 0.8778443342965954, "grad_norm": 1.5625, "learning_rate": 6.220001771392173e-05, "loss": 6.9099, "step": 15290 }, { "epoch": 0.8784184640116357, "grad_norm": 1.6171875, "learning_rate": 6.21549278241433e-05, "loss": 6.9214, "step": 15300 }, { "epoch": 0.8789925937266759, "grad_norm": 1.6484375, "learning_rate": 6.210982742619395e-05, "loss": 6.8928, "step": 15310 }, { "epoch": 0.8795667234417163, "grad_norm": 1.5859375, "learning_rate": 6.206471655906388e-05, "loss": 6.9285, "step": 15320 }, { "epoch": 0.8801408531567565, "grad_norm": 1.578125, "learning_rate": 6.20195952617523e-05, "loss": 6.9484, "step": 15330 }, { "epoch": 0.8807149828717968, "grad_norm": 1.6640625, "learning_rate": 6.197446357326745e-05, "loss": 6.9297, "step": 15340 }, { "epoch": 0.8812891125868371, "grad_norm": 1.625, "learning_rate": 6.192932153262653e-05, "loss": 6.8879, "step": 15350 }, { "epoch": 0.8818632423018774, "grad_norm": 1.53125, "learning_rate": 6.188416917885572e-05, "loss": 6.8707, "step": 15360 }, { "epoch": 0.8824373720169177, "grad_norm": 1.5390625, "learning_rate": 6.183900655099013e-05, "loss": 6.9316, "step": 15370 }, { "epoch": 0.883011501731958, "grad_norm": 1.5703125, "learning_rate": 6.179383368807369e-05, "loss": 6.9382, "step": 15380 }, { "epoch": 0.8835856314469983, "grad_norm": 1.5390625, "learning_rate": 6.174865062915924e-05, "loss": 6.9445, "step": 15390 }, { "epoch": 0.8841597611620385, "grad_norm": 1.5703125, "learning_rate": 6.170345741330839e-05, "loss": 6.8567, "step": 15400 }, { "epoch": 0.8847338908770789, "grad_norm": 1.6953125, "learning_rate": 6.165825407959158e-05, "loss": 6.9408, "step": 15410 }, { "epoch": 0.8853080205921191, "grad_norm": 1.5546875, "learning_rate": 6.161304066708796e-05, "loss": 6.9133, "step": 15420 }, { "epoch": 0.8858821503071594, "grad_norm": 1.6328125, "learning_rate": 6.156781721488538e-05, "loss": 6.8569, "step": 15430 }, { "epoch": 0.8864562800221997, "grad_norm": 1.5859375, "learning_rate": 6.152258376208042e-05, "loss": 6.8869, "step": 15440 }, { "epoch": 0.88703040973724, "grad_norm": 1.671875, "learning_rate": 6.147734034777828e-05, "loss": 6.9566, "step": 15450 }, { "epoch": 0.8876045394522802, "grad_norm": 1.578125, "learning_rate": 6.143208701109274e-05, "loss": 6.8694, "step": 15460 }, { "epoch": 0.8881786691673206, "grad_norm": 1.6328125, "learning_rate": 6.13868237911462e-05, "loss": 6.9282, "step": 15470 }, { "epoch": 0.8887527988823608, "grad_norm": 1.59375, "learning_rate": 6.13415507270696e-05, "loss": 6.9229, "step": 15480 }, { "epoch": 0.8893269285974011, "grad_norm": 1.6171875, "learning_rate": 6.129626785800237e-05, "loss": 6.9499, "step": 15490 }, { "epoch": 0.8899010583124414, "grad_norm": 1.640625, "learning_rate": 6.125097522309245e-05, "loss": 6.9157, "step": 15500 }, { "epoch": 0.8904751880274817, "grad_norm": 1.5703125, "learning_rate": 6.120567286149618e-05, "loss": 6.9177, "step": 15510 }, { "epoch": 0.8910493177425219, "grad_norm": 1.6953125, "learning_rate": 6.116036081237834e-05, "loss": 6.8992, "step": 15520 }, { "epoch": 0.8916234474575623, "grad_norm": 1.5546875, "learning_rate": 6.111503911491207e-05, "loss": 6.8907, "step": 15530 }, { "epoch": 0.8921975771726025, "grad_norm": 1.53125, "learning_rate": 6.106970780827886e-05, "loss": 6.8982, "step": 15540 }, { "epoch": 0.8927717068876428, "grad_norm": 1.59375, "learning_rate": 6.102436693166852e-05, "loss": 6.9049, "step": 15550 }, { "epoch": 0.8933458366026831, "grad_norm": 1.546875, "learning_rate": 6.0979016524279077e-05, "loss": 6.9312, "step": 15560 }, { "epoch": 0.8939199663177234, "grad_norm": 1.515625, "learning_rate": 6.093365662531686e-05, "loss": 6.8921, "step": 15570 }, { "epoch": 0.8944940960327636, "grad_norm": 1.59375, "learning_rate": 6.0888287273996404e-05, "loss": 6.9304, "step": 15580 }, { "epoch": 0.895068225747804, "grad_norm": 1.53125, "learning_rate": 6.084290850954036e-05, "loss": 6.921, "step": 15590 }, { "epoch": 0.8956423554628442, "grad_norm": 1.6171875, "learning_rate": 6.0797520371179585e-05, "loss": 6.9617, "step": 15600 }, { "epoch": 0.8962164851778845, "grad_norm": 1.546875, "learning_rate": 6.0752122898152955e-05, "loss": 6.9234, "step": 15610 }, { "epoch": 0.8967906148929248, "grad_norm": 1.6015625, "learning_rate": 6.070671612970751e-05, "loss": 6.9161, "step": 15620 }, { "epoch": 0.8973647446079651, "grad_norm": 1.59375, "learning_rate": 6.066130010509827e-05, "loss": 6.9208, "step": 15630 }, { "epoch": 0.8979388743230053, "grad_norm": 1.5703125, "learning_rate": 6.061587486358826e-05, "loss": 6.9033, "step": 15640 }, { "epoch": 0.8985130040380457, "grad_norm": 1.5625, "learning_rate": 6.057044044444848e-05, "loss": 6.9014, "step": 15650 }, { "epoch": 0.8990871337530859, "grad_norm": 1.65625, "learning_rate": 6.0524996886957896e-05, "loss": 6.8919, "step": 15660 }, { "epoch": 0.8996612634681262, "grad_norm": 1.640625, "learning_rate": 6.047954423040332e-05, "loss": 6.9082, "step": 15670 }, { "epoch": 0.9002353931831665, "grad_norm": 1.625, "learning_rate": 6.043408251407945e-05, "loss": 6.9212, "step": 15680 }, { "epoch": 0.9008095228982068, "grad_norm": 1.578125, "learning_rate": 6.038861177728884e-05, "loss": 6.9136, "step": 15690 }, { "epoch": 0.901383652613247, "grad_norm": 1.5859375, "learning_rate": 6.03431320593418e-05, "loss": 6.9018, "step": 15700 }, { "epoch": 0.9019577823282874, "grad_norm": 1.7109375, "learning_rate": 6.029764339955646e-05, "loss": 6.9162, "step": 15710 }, { "epoch": 0.9025319120433276, "grad_norm": 1.546875, "learning_rate": 6.0252145837258636e-05, "loss": 6.9063, "step": 15720 }, { "epoch": 0.903106041758368, "grad_norm": 1.6640625, "learning_rate": 6.020663941178184e-05, "loss": 6.8981, "step": 15730 }, { "epoch": 0.9036801714734082, "grad_norm": 1.6484375, "learning_rate": 6.016112416246726e-05, "loss": 6.8945, "step": 15740 }, { "epoch": 0.9042543011884485, "grad_norm": 1.578125, "learning_rate": 6.011560012866375e-05, "loss": 6.9036, "step": 15750 }, { "epoch": 0.9048284309034887, "grad_norm": 1.65625, "learning_rate": 6.0070067349727675e-05, "loss": 6.9011, "step": 15760 }, { "epoch": 0.9054025606185291, "grad_norm": 1.625, "learning_rate": 6.002452586502303e-05, "loss": 6.9039, "step": 15770 }, { "epoch": 0.9059766903335693, "grad_norm": 1.5078125, "learning_rate": 5.9978975713921294e-05, "loss": 6.9252, "step": 15780 }, { "epoch": 0.9065508200486097, "grad_norm": 1.609375, "learning_rate": 5.9933416935801466e-05, "loss": 6.9102, "step": 15790 }, { "epoch": 0.9071249497636499, "grad_norm": 1.53125, "learning_rate": 5.9887849570050016e-05, "loss": 6.9262, "step": 15800 }, { "epoch": 0.9076990794786902, "grad_norm": 1.6875, "learning_rate": 5.984227365606078e-05, "loss": 6.877, "step": 15810 }, { "epoch": 0.9082732091937306, "grad_norm": 1.703125, "learning_rate": 5.979668923323503e-05, "loss": 6.9007, "step": 15820 }, { "epoch": 0.9088473389087708, "grad_norm": 1.609375, "learning_rate": 5.9751096340981373e-05, "loss": 6.9126, "step": 15830 }, { "epoch": 0.9094214686238111, "grad_norm": 1.7265625, "learning_rate": 5.970549501871578e-05, "loss": 6.8955, "step": 15840 }, { "epoch": 0.9099955983388514, "grad_norm": 1.515625, "learning_rate": 5.9659885305861476e-05, "loss": 6.9163, "step": 15850 }, { "epoch": 0.9105697280538917, "grad_norm": 1.6171875, "learning_rate": 5.961426724184892e-05, "loss": 6.9319, "step": 15860 }, { "epoch": 0.9111438577689319, "grad_norm": 1.5546875, "learning_rate": 5.956864086611581e-05, "loss": 6.9329, "step": 15870 }, { "epoch": 0.9117179874839723, "grad_norm": 1.765625, "learning_rate": 5.952300621810707e-05, "loss": 6.9002, "step": 15880 }, { "epoch": 0.9122921171990125, "grad_norm": 1.515625, "learning_rate": 5.9477363337274696e-05, "loss": 6.8949, "step": 15890 }, { "epoch": 0.9128662469140528, "grad_norm": 1.703125, "learning_rate": 5.9431712263077864e-05, "loss": 6.9267, "step": 15900 }, { "epoch": 0.913440376629093, "grad_norm": 1.6328125, "learning_rate": 5.938605303498282e-05, "loss": 6.9459, "step": 15910 }, { "epoch": 0.9140145063441334, "grad_norm": 1.5703125, "learning_rate": 5.934038569246283e-05, "loss": 6.9093, "step": 15920 }, { "epoch": 0.9145886360591736, "grad_norm": 1.640625, "learning_rate": 5.929471027499822e-05, "loss": 6.8726, "step": 15930 }, { "epoch": 0.915162765774214, "grad_norm": 1.5859375, "learning_rate": 5.924902682207627e-05, "loss": 6.8642, "step": 15940 }, { "epoch": 0.9157368954892542, "grad_norm": 1.6484375, "learning_rate": 5.9203335373191195e-05, "loss": 6.9166, "step": 15950 }, { "epoch": 0.9163110252042945, "grad_norm": 1.6171875, "learning_rate": 5.915763596784415e-05, "loss": 6.9005, "step": 15960 }, { "epoch": 0.9168851549193348, "grad_norm": 1.65625, "learning_rate": 5.911192864554317e-05, "loss": 6.912, "step": 15970 }, { "epoch": 0.9174592846343751, "grad_norm": 1.59375, "learning_rate": 5.90662134458031e-05, "loss": 6.9025, "step": 15980 }, { "epoch": 0.9180334143494153, "grad_norm": 1.5703125, "learning_rate": 5.902049040814563e-05, "loss": 6.9095, "step": 15990 }, { "epoch": 0.9186075440644557, "grad_norm": 1.6796875, "learning_rate": 5.89747595720992e-05, "loss": 6.8551, "step": 16000 }, { "epoch": 0.9191816737794959, "grad_norm": 1.5625, "learning_rate": 5.8929020977199034e-05, "loss": 6.9176, "step": 16010 }, { "epoch": 0.9197558034945362, "grad_norm": 1.59375, "learning_rate": 5.888327466298701e-05, "loss": 6.9277, "step": 16020 }, { "epoch": 0.9203299332095765, "grad_norm": 1.6875, "learning_rate": 5.88375206690117e-05, "loss": 6.8961, "step": 16030 }, { "epoch": 0.9209040629246168, "grad_norm": 1.6953125, "learning_rate": 5.879175903482833e-05, "loss": 6.9116, "step": 16040 }, { "epoch": 0.921478192639657, "grad_norm": 1.5703125, "learning_rate": 5.874598979999873e-05, "loss": 6.8972, "step": 16050 }, { "epoch": 0.9220523223546974, "grad_norm": 1.578125, "learning_rate": 5.870021300409128e-05, "loss": 6.9153, "step": 16060 }, { "epoch": 0.9226264520697376, "grad_norm": 1.640625, "learning_rate": 5.8654428686680905e-05, "loss": 6.8592, "step": 16070 }, { "epoch": 0.9232005817847779, "grad_norm": 1.625, "learning_rate": 5.860863688734903e-05, "loss": 6.9135, "step": 16080 }, { "epoch": 0.9237747114998182, "grad_norm": 1.5078125, "learning_rate": 5.8562837645683575e-05, "loss": 6.9168, "step": 16090 }, { "epoch": 0.9243488412148585, "grad_norm": 1.5234375, "learning_rate": 5.851703100127886e-05, "loss": 6.8931, "step": 16100 }, { "epoch": 0.9249229709298987, "grad_norm": 1.609375, "learning_rate": 5.8471216993735626e-05, "loss": 6.9196, "step": 16110 }, { "epoch": 0.9254971006449391, "grad_norm": 1.5625, "learning_rate": 5.842539566266095e-05, "loss": 6.8941, "step": 16120 }, { "epoch": 0.9260712303599793, "grad_norm": 1.5859375, "learning_rate": 5.837956704766829e-05, "loss": 6.9126, "step": 16130 }, { "epoch": 0.9266453600750196, "grad_norm": 1.578125, "learning_rate": 5.833373118837734e-05, "loss": 6.9539, "step": 16140 }, { "epoch": 0.9272194897900599, "grad_norm": 1.6015625, "learning_rate": 5.8287888124414126e-05, "loss": 6.8976, "step": 16150 }, { "epoch": 0.9277936195051002, "grad_norm": 1.6875, "learning_rate": 5.824203789541085e-05, "loss": 6.8622, "step": 16160 }, { "epoch": 0.9283677492201404, "grad_norm": 1.5625, "learning_rate": 5.819618054100591e-05, "loss": 6.9183, "step": 16170 }, { "epoch": 0.9289418789351808, "grad_norm": 1.6015625, "learning_rate": 5.8150316100843895e-05, "loss": 6.9262, "step": 16180 }, { "epoch": 0.929516008650221, "grad_norm": 1.6328125, "learning_rate": 5.81044446145755e-05, "loss": 6.9072, "step": 16190 }, { "epoch": 0.9300901383652613, "grad_norm": 1.609375, "learning_rate": 5.8058566121857514e-05, "loss": 6.8663, "step": 16200 }, { "epoch": 0.9306642680803016, "grad_norm": 1.6328125, "learning_rate": 5.8012680662352795e-05, "loss": 6.9084, "step": 16210 }, { "epoch": 0.9312383977953419, "grad_norm": 1.546875, "learning_rate": 5.796678827573018e-05, "loss": 6.8419, "step": 16220 }, { "epoch": 0.9318125275103821, "grad_norm": 1.6328125, "learning_rate": 5.792088900166457e-05, "loss": 6.8997, "step": 16230 }, { "epoch": 0.9323866572254225, "grad_norm": 1.59375, "learning_rate": 5.7874982879836746e-05, "loss": 6.8838, "step": 16240 }, { "epoch": 0.9329607869404627, "grad_norm": 1.65625, "learning_rate": 5.7829069949933464e-05, "loss": 6.9244, "step": 16250 }, { "epoch": 0.933534916655503, "grad_norm": 1.578125, "learning_rate": 5.778315025164731e-05, "loss": 6.9197, "step": 16260 }, { "epoch": 0.9341090463705434, "grad_norm": 1.6640625, "learning_rate": 5.77372238246768e-05, "loss": 6.8825, "step": 16270 }, { "epoch": 0.9346831760855836, "grad_norm": 1.5703125, "learning_rate": 5.769129070872619e-05, "loss": 6.9021, "step": 16280 }, { "epoch": 0.9352573058006239, "grad_norm": 1.6796875, "learning_rate": 5.7645350943505547e-05, "loss": 6.9201, "step": 16290 }, { "epoch": 0.9358314355156642, "grad_norm": 1.5703125, "learning_rate": 5.759940456873071e-05, "loss": 6.8906, "step": 16300 }, { "epoch": 0.9364055652307045, "grad_norm": 1.65625, "learning_rate": 5.755345162412318e-05, "loss": 6.9528, "step": 16310 }, { "epoch": 0.9369796949457447, "grad_norm": 1.609375, "learning_rate": 5.7507492149410204e-05, "loss": 6.8871, "step": 16320 }, { "epoch": 0.9375538246607851, "grad_norm": 1.5390625, "learning_rate": 5.746152618432462e-05, "loss": 6.9257, "step": 16330 }, { "epoch": 0.9381279543758253, "grad_norm": 1.5859375, "learning_rate": 5.7415553768604904e-05, "loss": 6.9333, "step": 16340 }, { "epoch": 0.9387020840908656, "grad_norm": 1.671875, "learning_rate": 5.736957494199509e-05, "loss": 6.9548, "step": 16350 }, { "epoch": 0.9392762138059059, "grad_norm": 1.4765625, "learning_rate": 5.7323589744244765e-05, "loss": 6.9107, "step": 16360 }, { "epoch": 0.9398503435209462, "grad_norm": 1.546875, "learning_rate": 5.727759821510904e-05, "loss": 6.9093, "step": 16370 }, { "epoch": 0.9404244732359864, "grad_norm": 1.5, "learning_rate": 5.72316003943485e-05, "loss": 6.9069, "step": 16380 }, { "epoch": 0.9409986029510268, "grad_norm": 1.609375, "learning_rate": 5.7185596321729106e-05, "loss": 6.9084, "step": 16390 }, { "epoch": 0.941572732666067, "grad_norm": 1.6328125, "learning_rate": 5.71395860370223e-05, "loss": 6.8798, "step": 16400 }, { "epoch": 0.9421468623811073, "grad_norm": 1.609375, "learning_rate": 5.7093569580004855e-05, "loss": 6.9022, "step": 16410 }, { "epoch": 0.9427209920961476, "grad_norm": 1.59375, "learning_rate": 5.704754699045891e-05, "loss": 6.9142, "step": 16420 }, { "epoch": 0.9432951218111879, "grad_norm": 1.5546875, "learning_rate": 5.700151830817187e-05, "loss": 6.9078, "step": 16430 }, { "epoch": 0.9438692515262281, "grad_norm": 1.625, "learning_rate": 5.695548357293642e-05, "loss": 6.966, "step": 16440 }, { "epoch": 0.9444433812412685, "grad_norm": 1.59375, "learning_rate": 5.690944282455049e-05, "loss": 6.8658, "step": 16450 }, { "epoch": 0.9450175109563087, "grad_norm": 1.640625, "learning_rate": 5.6863396102817214e-05, "loss": 6.8941, "step": 16460 }, { "epoch": 0.945591640671349, "grad_norm": 1.6484375, "learning_rate": 5.681734344754486e-05, "loss": 6.895, "step": 16470 }, { "epoch": 0.9461657703863893, "grad_norm": 1.6328125, "learning_rate": 5.677128489854684e-05, "loss": 6.897, "step": 16480 }, { "epoch": 0.9467399001014296, "grad_norm": 1.5234375, "learning_rate": 5.672522049564165e-05, "loss": 6.872, "step": 16490 }, { "epoch": 0.9473140298164698, "grad_norm": 1.640625, "learning_rate": 5.6679150278652895e-05, "loss": 6.9132, "step": 16500 }, { "epoch": 0.9478881595315102, "grad_norm": 1.6171875, "learning_rate": 5.6633074287409135e-05, "loss": 6.9661, "step": 16510 }, { "epoch": 0.9484622892465504, "grad_norm": 1.578125, "learning_rate": 5.6586992561744e-05, "loss": 6.9056, "step": 16520 }, { "epoch": 0.9490364189615907, "grad_norm": 1.640625, "learning_rate": 5.654090514149598e-05, "loss": 6.9056, "step": 16530 }, { "epoch": 0.949610548676631, "grad_norm": 1.5625, "learning_rate": 5.649481206650859e-05, "loss": 6.8932, "step": 16540 }, { "epoch": 0.9501846783916713, "grad_norm": 1.578125, "learning_rate": 5.6448713376630194e-05, "loss": 6.8895, "step": 16550 }, { "epoch": 0.9507588081067115, "grad_norm": 1.5859375, "learning_rate": 5.640260911171397e-05, "loss": 6.9123, "step": 16560 }, { "epoch": 0.9513329378217519, "grad_norm": 1.5546875, "learning_rate": 5.635649931161794e-05, "loss": 6.8596, "step": 16570 }, { "epoch": 0.9519070675367921, "grad_norm": 1.796875, "learning_rate": 5.6310384016204965e-05, "loss": 6.899, "step": 16580 }, { "epoch": 0.9524811972518324, "grad_norm": 1.65625, "learning_rate": 5.6264263265342586e-05, "loss": 6.9051, "step": 16590 }, { "epoch": 0.9530553269668727, "grad_norm": 1.53125, "learning_rate": 5.62181370989031e-05, "loss": 6.8821, "step": 16600 }, { "epoch": 0.953629456681913, "grad_norm": 1.625, "learning_rate": 5.617200555676344e-05, "loss": 6.8929, "step": 16610 }, { "epoch": 0.9542035863969532, "grad_norm": 1.5859375, "learning_rate": 5.612586867880525e-05, "loss": 6.8953, "step": 16620 }, { "epoch": 0.9547777161119936, "grad_norm": 1.5703125, "learning_rate": 5.607972650491476e-05, "loss": 6.9298, "step": 16630 }, { "epoch": 0.9553518458270338, "grad_norm": 1.5390625, "learning_rate": 5.6033579074982766e-05, "loss": 6.8771, "step": 16640 }, { "epoch": 0.9559259755420741, "grad_norm": 1.5625, "learning_rate": 5.598742642890461e-05, "loss": 6.9059, "step": 16650 }, { "epoch": 0.9565001052571144, "grad_norm": 1.5546875, "learning_rate": 5.5941268606580146e-05, "loss": 6.9305, "step": 16660 }, { "epoch": 0.9570742349721547, "grad_norm": 1.6015625, "learning_rate": 5.5895105647913716e-05, "loss": 6.9076, "step": 16670 }, { "epoch": 0.9576483646871949, "grad_norm": 1.640625, "learning_rate": 5.58489375928141e-05, "loss": 6.9448, "step": 16680 }, { "epoch": 0.9582224944022353, "grad_norm": 1.546875, "learning_rate": 5.580276448119447e-05, "loss": 6.8704, "step": 16690 }, { "epoch": 0.9587966241172755, "grad_norm": 1.5625, "learning_rate": 5.5756586352972374e-05, "loss": 6.9062, "step": 16700 }, { "epoch": 0.9593707538323158, "grad_norm": 1.6171875, "learning_rate": 5.571040324806969e-05, "loss": 6.9017, "step": 16710 }, { "epoch": 0.9599448835473562, "grad_norm": 1.6328125, "learning_rate": 5.566421520641263e-05, "loss": 6.8891, "step": 16720 }, { "epoch": 0.9605190132623964, "grad_norm": 1.609375, "learning_rate": 5.561802226793165e-05, "loss": 6.9133, "step": 16730 }, { "epoch": 0.9610931429774368, "grad_norm": 1.609375, "learning_rate": 5.557182447256142e-05, "loss": 6.865, "step": 16740 }, { "epoch": 0.961667272692477, "grad_norm": 1.59375, "learning_rate": 5.552562186024084e-05, "loss": 6.8862, "step": 16750 }, { "epoch": 0.9622414024075173, "grad_norm": 1.5703125, "learning_rate": 5.547941447091297e-05, "loss": 6.9085, "step": 16760 }, { "epoch": 0.9628155321225575, "grad_norm": 1.6328125, "learning_rate": 5.5433202344525e-05, "loss": 6.8982, "step": 16770 }, { "epoch": 0.9633896618375979, "grad_norm": 1.6015625, "learning_rate": 5.53869855210282e-05, "loss": 6.9161, "step": 16780 }, { "epoch": 0.9639637915526381, "grad_norm": 1.6328125, "learning_rate": 5.53407640403779e-05, "loss": 6.8866, "step": 16790 }, { "epoch": 0.9645379212676785, "grad_norm": 1.5546875, "learning_rate": 5.52945379425335e-05, "loss": 6.9069, "step": 16800 }, { "epoch": 0.9651120509827187, "grad_norm": 1.578125, "learning_rate": 5.5248307267458334e-05, "loss": 6.9002, "step": 16810 }, { "epoch": 0.965686180697759, "grad_norm": 1.640625, "learning_rate": 5.5202072055119715e-05, "loss": 6.8604, "step": 16820 }, { "epoch": 0.9662603104127993, "grad_norm": 1.5, "learning_rate": 5.5155832345488875e-05, "loss": 6.8671, "step": 16830 }, { "epoch": 0.9668344401278396, "grad_norm": 1.71875, "learning_rate": 5.510958817854097e-05, "loss": 6.9308, "step": 16840 }, { "epoch": 0.9674085698428798, "grad_norm": 1.546875, "learning_rate": 5.506333959425497e-05, "loss": 6.9129, "step": 16850 }, { "epoch": 0.9679826995579202, "grad_norm": 1.625, "learning_rate": 5.501708663261366e-05, "loss": 6.8769, "step": 16860 }, { "epoch": 0.9685568292729604, "grad_norm": 1.53125, "learning_rate": 5.49708293336036e-05, "loss": 6.8734, "step": 16870 }, { "epoch": 0.9691309589880007, "grad_norm": 1.53125, "learning_rate": 5.492456773721517e-05, "loss": 6.9219, "step": 16880 }, { "epoch": 0.969705088703041, "grad_norm": 1.5625, "learning_rate": 5.4878301883442396e-05, "loss": 6.8936, "step": 16890 }, { "epoch": 0.9702792184180813, "grad_norm": 1.5625, "learning_rate": 5.483203181228301e-05, "loss": 6.8887, "step": 16900 }, { "epoch": 0.9708533481331215, "grad_norm": 1.671875, "learning_rate": 5.4785757563738396e-05, "loss": 6.9107, "step": 16910 }, { "epoch": 0.9714274778481619, "grad_norm": 1.625, "learning_rate": 5.4739479177813516e-05, "loss": 6.9171, "step": 16920 }, { "epoch": 0.9720016075632021, "grad_norm": 1.59375, "learning_rate": 5.469319669451692e-05, "loss": 6.9316, "step": 16930 }, { "epoch": 0.9725757372782424, "grad_norm": 1.5390625, "learning_rate": 5.4646910153860764e-05, "loss": 6.8953, "step": 16940 }, { "epoch": 0.9731498669932827, "grad_norm": 1.71875, "learning_rate": 5.460061959586063e-05, "loss": 6.8968, "step": 16950 }, { "epoch": 0.973723996708323, "grad_norm": 1.5625, "learning_rate": 5.455432506053562e-05, "loss": 6.8586, "step": 16960 }, { "epoch": 0.9742981264233632, "grad_norm": 1.6484375, "learning_rate": 5.450802658790821e-05, "loss": 6.8524, "step": 16970 }, { "epoch": 0.9748722561384036, "grad_norm": 1.6171875, "learning_rate": 5.4461724218004386e-05, "loss": 6.9294, "step": 16980 }, { "epoch": 0.9754463858534438, "grad_norm": 1.546875, "learning_rate": 5.441541799085341e-05, "loss": 6.8962, "step": 16990 }, { "epoch": 0.9760205155684841, "grad_norm": 1.625, "learning_rate": 5.436910794648794e-05, "loss": 6.8631, "step": 17000 }, { "epoch": 0.9765946452835244, "grad_norm": 1.5859375, "learning_rate": 5.432279412494386e-05, "loss": 6.8757, "step": 17010 }, { "epoch": 0.9771687749985647, "grad_norm": 1.5390625, "learning_rate": 5.4276476566260426e-05, "loss": 6.9315, "step": 17020 }, { "epoch": 0.9777429047136049, "grad_norm": 1.609375, "learning_rate": 5.423015531048003e-05, "loss": 6.9108, "step": 17030 }, { "epoch": 0.9783170344286453, "grad_norm": 1.546875, "learning_rate": 5.418383039764833e-05, "loss": 6.851, "step": 17040 }, { "epoch": 0.9788911641436855, "grad_norm": 1.5625, "learning_rate": 5.413750186781406e-05, "loss": 6.8727, "step": 17050 }, { "epoch": 0.9794652938587258, "grad_norm": 1.578125, "learning_rate": 5.409116976102916e-05, "loss": 6.8615, "step": 17060 }, { "epoch": 0.980039423573766, "grad_norm": 1.6875, "learning_rate": 5.4044834117348643e-05, "loss": 6.8752, "step": 17070 }, { "epoch": 0.9806135532888064, "grad_norm": 1.6640625, "learning_rate": 5.3998494976830574e-05, "loss": 6.8833, "step": 17080 }, { "epoch": 0.9811876830038466, "grad_norm": 1.5859375, "learning_rate": 5.395215237953601e-05, "loss": 6.8956, "step": 17090 }, { "epoch": 0.981761812718887, "grad_norm": 1.59375, "learning_rate": 5.390580636552904e-05, "loss": 6.8642, "step": 17100 }, { "epoch": 0.9823359424339272, "grad_norm": 1.5234375, "learning_rate": 5.385945697487672e-05, "loss": 6.9134, "step": 17110 }, { "epoch": 0.9829100721489675, "grad_norm": 1.578125, "learning_rate": 5.3813104247648973e-05, "loss": 6.8912, "step": 17120 }, { "epoch": 0.9834842018640078, "grad_norm": 1.609375, "learning_rate": 5.376674822391861e-05, "loss": 6.8854, "step": 17130 }, { "epoch": 0.9840583315790481, "grad_norm": 1.640625, "learning_rate": 5.372038894376135e-05, "loss": 6.9169, "step": 17140 }, { "epoch": 0.9846324612940883, "grad_norm": 1.7265625, "learning_rate": 5.367402644725566e-05, "loss": 6.8841, "step": 17150 }, { "epoch": 0.9852065910091287, "grad_norm": 1.5234375, "learning_rate": 5.3627660774482846e-05, "loss": 6.9321, "step": 17160 }, { "epoch": 0.985780720724169, "grad_norm": 1.5390625, "learning_rate": 5.3581291965526924e-05, "loss": 6.9105, "step": 17170 }, { "epoch": 0.9863548504392092, "grad_norm": 1.5, "learning_rate": 5.353492006047461e-05, "loss": 6.9344, "step": 17180 }, { "epoch": 0.9869289801542496, "grad_norm": 1.6171875, "learning_rate": 5.348854509941533e-05, "loss": 6.8562, "step": 17190 }, { "epoch": 0.9875031098692898, "grad_norm": 1.6640625, "learning_rate": 5.3442167122441145e-05, "loss": 6.9075, "step": 17200 }, { "epoch": 0.9880772395843301, "grad_norm": 1.75, "learning_rate": 5.33957861696467e-05, "loss": 6.894, "step": 17210 }, { "epoch": 0.9886513692993704, "grad_norm": 1.5859375, "learning_rate": 5.3349402281129246e-05, "loss": 6.8863, "step": 17220 }, { "epoch": 0.9892254990144107, "grad_norm": 1.59375, "learning_rate": 5.330301549698853e-05, "loss": 6.8475, "step": 17230 }, { "epoch": 0.9897996287294509, "grad_norm": 1.59375, "learning_rate": 5.325662585732683e-05, "loss": 6.8824, "step": 17240 }, { "epoch": 0.9903737584444913, "grad_norm": 1.65625, "learning_rate": 5.321023340224893e-05, "loss": 6.8266, "step": 17250 }, { "epoch": 0.9909478881595315, "grad_norm": 1.6796875, "learning_rate": 5.316383817186196e-05, "loss": 6.8925, "step": 17260 }, { "epoch": 0.9915220178745718, "grad_norm": 1.671875, "learning_rate": 5.3117440206275504e-05, "loss": 6.916, "step": 17270 }, { "epoch": 0.9920961475896121, "grad_norm": 1.5859375, "learning_rate": 5.307103954560153e-05, "loss": 6.9125, "step": 17280 }, { "epoch": 0.9926702773046524, "grad_norm": 1.484375, "learning_rate": 5.302463622995429e-05, "loss": 6.8616, "step": 17290 }, { "epoch": 0.9932444070196926, "grad_norm": 1.5859375, "learning_rate": 5.297823029945036e-05, "loss": 6.9259, "step": 17300 }, { "epoch": 0.993818536734733, "grad_norm": 1.546875, "learning_rate": 5.293182179420855e-05, "loss": 6.8977, "step": 17310 }, { "epoch": 0.9943926664497732, "grad_norm": 1.609375, "learning_rate": 5.288541075434992e-05, "loss": 6.9003, "step": 17320 }, { "epoch": 0.9949667961648135, "grad_norm": 1.671875, "learning_rate": 5.283899721999772e-05, "loss": 6.8966, "step": 17330 }, { "epoch": 0.9955409258798538, "grad_norm": 1.5703125, "learning_rate": 5.279258123127735e-05, "loss": 6.9307, "step": 17340 }, { "epoch": 0.9961150555948941, "grad_norm": 1.6171875, "learning_rate": 5.2746162828316334e-05, "loss": 6.9119, "step": 17350 }, { "epoch": 0.9966891853099343, "grad_norm": 1.5703125, "learning_rate": 5.269974205124426e-05, "loss": 6.8631, "step": 17360 }, { "epoch": 0.9972633150249747, "grad_norm": 1.578125, "learning_rate": 5.265331894019283e-05, "loss": 6.8766, "step": 17370 }, { "epoch": 0.9978374447400149, "grad_norm": 1.546875, "learning_rate": 5.260689353529571e-05, "loss": 6.8461, "step": 17380 }, { "epoch": 0.9984115744550552, "grad_norm": 1.609375, "learning_rate": 5.256046587668855e-05, "loss": 6.8429, "step": 17390 }, { "epoch": 0.9989857041700955, "grad_norm": 1.671875, "learning_rate": 5.251403600450895e-05, "loss": 6.8993, "step": 17400 }, { "epoch": 0.9995598338851358, "grad_norm": 1.5546875, "learning_rate": 5.246760395889646e-05, "loss": 6.8517, "step": 17410 }, { "epoch": 1.0001148259430082, "grad_norm": 1.5078125, "learning_rate": 5.2421169779992486e-05, "loss": 6.8916, "step": 17420 }, { "epoch": 1.0006889556580483, "grad_norm": 1.6015625, "learning_rate": 5.237473350794026e-05, "loss": 6.8312, "step": 17430 }, { "epoch": 1.0012630853730886, "grad_norm": 1.59375, "learning_rate": 5.232829518288487e-05, "loss": 6.8256, "step": 17440 }, { "epoch": 1.001837215088129, "grad_norm": 1.5390625, "learning_rate": 5.22818548449731e-05, "loss": 6.8157, "step": 17450 }, { "epoch": 1.0024113448031693, "grad_norm": 1.578125, "learning_rate": 5.223541253435356e-05, "loss": 6.8387, "step": 17460 }, { "epoch": 1.0029854745182094, "grad_norm": 1.7421875, "learning_rate": 5.2188968291176524e-05, "loss": 6.7973, "step": 17470 }, { "epoch": 1.0035596042332497, "grad_norm": 1.5703125, "learning_rate": 5.214252215559393e-05, "loss": 6.8648, "step": 17480 }, { "epoch": 1.00413373394829, "grad_norm": 1.640625, "learning_rate": 5.209607416775937e-05, "loss": 6.8186, "step": 17490 }, { "epoch": 1.0047078636633304, "grad_norm": 1.6484375, "learning_rate": 5.204962436782802e-05, "loss": 6.8089, "step": 17500 }, { "epoch": 1.0052819933783705, "grad_norm": 1.53125, "learning_rate": 5.200317279595666e-05, "loss": 6.7777, "step": 17510 }, { "epoch": 1.0058561230934109, "grad_norm": 1.5625, "learning_rate": 5.1956719492303554e-05, "loss": 6.7948, "step": 17520 }, { "epoch": 1.0064302528084512, "grad_norm": 1.6015625, "learning_rate": 5.191026449702848e-05, "loss": 6.8556, "step": 17530 }, { "epoch": 1.0070043825234916, "grad_norm": 1.5546875, "learning_rate": 5.186380785029269e-05, "loss": 6.8099, "step": 17540 }, { "epoch": 1.0075785122385317, "grad_norm": 1.53125, "learning_rate": 5.181734959225886e-05, "loss": 6.8332, "step": 17550 }, { "epoch": 1.008152641953572, "grad_norm": 1.609375, "learning_rate": 5.177088976309106e-05, "loss": 6.8506, "step": 17560 }, { "epoch": 1.0087267716686124, "grad_norm": 1.5859375, "learning_rate": 5.1724428402954694e-05, "loss": 6.805, "step": 17570 }, { "epoch": 1.0093009013836527, "grad_norm": 1.6953125, "learning_rate": 5.1677965552016515e-05, "loss": 6.8179, "step": 17580 }, { "epoch": 1.0098750310986928, "grad_norm": 1.5625, "learning_rate": 5.163150125044458e-05, "loss": 6.8217, "step": 17590 }, { "epoch": 1.0104491608137331, "grad_norm": 1.5390625, "learning_rate": 5.1585035538408155e-05, "loss": 6.7778, "step": 17600 }, { "epoch": 1.0110232905287735, "grad_norm": 1.5078125, "learning_rate": 5.153856845607776e-05, "loss": 6.8724, "step": 17610 }, { "epoch": 1.0115974202438138, "grad_norm": 1.5859375, "learning_rate": 5.149210004362508e-05, "loss": 6.8117, "step": 17620 }, { "epoch": 1.012171549958854, "grad_norm": 1.609375, "learning_rate": 5.1445630341222984e-05, "loss": 6.7884, "step": 17630 }, { "epoch": 1.0127456796738943, "grad_norm": 1.5703125, "learning_rate": 5.1399159389045406e-05, "loss": 6.8375, "step": 17640 }, { "epoch": 1.0133198093889346, "grad_norm": 1.6171875, "learning_rate": 5.1352687227267395e-05, "loss": 6.8501, "step": 17650 }, { "epoch": 1.013893939103975, "grad_norm": 1.53125, "learning_rate": 5.1306213896065024e-05, "loss": 6.8114, "step": 17660 }, { "epoch": 1.014468068819015, "grad_norm": 1.5859375, "learning_rate": 5.12597394356154e-05, "loss": 6.8388, "step": 17670 }, { "epoch": 1.0150421985340554, "grad_norm": 1.5546875, "learning_rate": 5.121326388609661e-05, "loss": 6.8205, "step": 17680 }, { "epoch": 1.0156163282490958, "grad_norm": 1.59375, "learning_rate": 5.116678728768764e-05, "loss": 6.8352, "step": 17690 }, { "epoch": 1.016190457964136, "grad_norm": 1.609375, "learning_rate": 5.112030968056843e-05, "loss": 6.8312, "step": 17700 }, { "epoch": 1.0167645876791762, "grad_norm": 1.546875, "learning_rate": 5.107383110491978e-05, "loss": 6.8543, "step": 17710 }, { "epoch": 1.0173387173942166, "grad_norm": 1.625, "learning_rate": 5.102735160092329e-05, "loss": 6.7808, "step": 17720 }, { "epoch": 1.017912847109257, "grad_norm": 2.125, "learning_rate": 5.0980871208761427e-05, "loss": 6.815, "step": 17730 }, { "epoch": 1.0184869768242972, "grad_norm": 1.671875, "learning_rate": 5.0934389968617366e-05, "loss": 6.8204, "step": 17740 }, { "epoch": 1.0190611065393373, "grad_norm": 1.5859375, "learning_rate": 5.088790792067506e-05, "loss": 6.8204, "step": 17750 }, { "epoch": 1.0196352362543777, "grad_norm": 1.578125, "learning_rate": 5.084142510511911e-05, "loss": 6.7875, "step": 17760 }, { "epoch": 1.020209365969418, "grad_norm": 1.5703125, "learning_rate": 5.079494156213485e-05, "loss": 6.8449, "step": 17770 }, { "epoch": 1.0207834956844584, "grad_norm": 1.5234375, "learning_rate": 5.0748457331908186e-05, "loss": 6.7516, "step": 17780 }, { "epoch": 1.0213576253994985, "grad_norm": 1.53125, "learning_rate": 5.070197245462564e-05, "loss": 6.7899, "step": 17790 }, { "epoch": 1.0219317551145388, "grad_norm": 1.578125, "learning_rate": 5.065548697047429e-05, "loss": 6.7944, "step": 17800 }, { "epoch": 1.0225058848295792, "grad_norm": 1.5625, "learning_rate": 5.060900091964174e-05, "loss": 6.7772, "step": 17810 }, { "epoch": 1.0230800145446195, "grad_norm": 1.5859375, "learning_rate": 5.056251434231607e-05, "loss": 6.8124, "step": 17820 }, { "epoch": 1.0236541442596598, "grad_norm": 1.5234375, "learning_rate": 5.051602727868585e-05, "loss": 6.7964, "step": 17830 }, { "epoch": 1.0242282739747, "grad_norm": 1.5859375, "learning_rate": 5.046953976894002e-05, "loss": 6.8475, "step": 17840 }, { "epoch": 1.0248024036897403, "grad_norm": 1.59375, "learning_rate": 5.0423051853267965e-05, "loss": 6.8388, "step": 17850 }, { "epoch": 1.0253765334047806, "grad_norm": 1.5859375, "learning_rate": 5.037656357185938e-05, "loss": 6.824, "step": 17860 }, { "epoch": 1.025950663119821, "grad_norm": 1.5, "learning_rate": 5.0330074964904273e-05, "loss": 6.7823, "step": 17870 }, { "epoch": 1.026524792834861, "grad_norm": 1.640625, "learning_rate": 5.028358607259297e-05, "loss": 6.8141, "step": 17880 }, { "epoch": 1.0270989225499014, "grad_norm": 1.5390625, "learning_rate": 5.023709693511598e-05, "loss": 6.8081, "step": 17890 }, { "epoch": 1.0276730522649418, "grad_norm": 1.5703125, "learning_rate": 5.019060759266411e-05, "loss": 6.7904, "step": 17900 }, { "epoch": 1.028247181979982, "grad_norm": 1.5234375, "learning_rate": 5.014411808542827e-05, "loss": 6.8179, "step": 17910 }, { "epoch": 1.0288213116950222, "grad_norm": 1.5625, "learning_rate": 5.009762845359954e-05, "loss": 6.7808, "step": 17920 }, { "epoch": 1.0293954414100626, "grad_norm": 1.4765625, "learning_rate": 5.005113873736914e-05, "loss": 6.784, "step": 17930 }, { "epoch": 1.029969571125103, "grad_norm": 1.7890625, "learning_rate": 5.000464897692829e-05, "loss": 6.7981, "step": 17940 }, { "epoch": 1.0305437008401432, "grad_norm": 1.5, "learning_rate": 4.9958159212468335e-05, "loss": 6.8296, "step": 17950 }, { "epoch": 1.0311178305551834, "grad_norm": 1.46875, "learning_rate": 4.991166948418054e-05, "loss": 6.836, "step": 17960 }, { "epoch": 1.0316919602702237, "grad_norm": 1.5703125, "learning_rate": 4.9865179832256174e-05, "loss": 6.8217, "step": 17970 }, { "epoch": 1.032266089985264, "grad_norm": 1.53125, "learning_rate": 4.9818690296886475e-05, "loss": 6.86, "step": 17980 }, { "epoch": 1.0328402197003044, "grad_norm": 1.7421875, "learning_rate": 4.977220091826253e-05, "loss": 6.8151, "step": 17990 }, { "epoch": 1.0334143494153445, "grad_norm": 1.53125, "learning_rate": 4.972571173657531e-05, "loss": 6.7877, "step": 18000 }, { "epoch": 1.0339884791303848, "grad_norm": 1.4765625, "learning_rate": 4.96792227920156e-05, "loss": 6.7953, "step": 18010 }, { "epoch": 1.0345626088454252, "grad_norm": 1.578125, "learning_rate": 4.963273412477403e-05, "loss": 6.8301, "step": 18020 }, { "epoch": 1.0351367385604655, "grad_norm": 1.640625, "learning_rate": 4.9586245775040926e-05, "loss": 6.8222, "step": 18030 }, { "epoch": 1.0357108682755056, "grad_norm": 1.6015625, "learning_rate": 4.9539757783006376e-05, "loss": 6.805, "step": 18040 }, { "epoch": 1.036284997990546, "grad_norm": 1.5625, "learning_rate": 4.949327018886013e-05, "loss": 6.778, "step": 18050 }, { "epoch": 1.0368591277055863, "grad_norm": 1.5859375, "learning_rate": 4.944678303279166e-05, "loss": 6.7922, "step": 18060 }, { "epoch": 1.0374332574206266, "grad_norm": 1.515625, "learning_rate": 4.9400296354989974e-05, "loss": 6.8341, "step": 18070 }, { "epoch": 1.0380073871356668, "grad_norm": 1.6484375, "learning_rate": 4.935381019564374e-05, "loss": 6.853, "step": 18080 }, { "epoch": 1.038581516850707, "grad_norm": 1.6171875, "learning_rate": 4.930732459494113e-05, "loss": 6.8091, "step": 18090 }, { "epoch": 1.0391556465657474, "grad_norm": 1.5625, "learning_rate": 4.926083959306984e-05, "loss": 6.8279, "step": 18100 }, { "epoch": 1.0397297762807878, "grad_norm": 1.578125, "learning_rate": 4.9214355230217054e-05, "loss": 6.844, "step": 18110 }, { "epoch": 1.040303905995828, "grad_norm": 1.6171875, "learning_rate": 4.916787154656942e-05, "loss": 6.8196, "step": 18120 }, { "epoch": 1.0408780357108682, "grad_norm": 1.546875, "learning_rate": 4.912138858231297e-05, "loss": 6.8181, "step": 18130 }, { "epoch": 1.0414521654259086, "grad_norm": 1.609375, "learning_rate": 4.907490637763314e-05, "loss": 6.7709, "step": 18140 }, { "epoch": 1.042026295140949, "grad_norm": 1.5859375, "learning_rate": 4.90284249727147e-05, "loss": 6.7914, "step": 18150 }, { "epoch": 1.042600424855989, "grad_norm": 1.5546875, "learning_rate": 4.8981944407741704e-05, "loss": 6.7891, "step": 18160 }, { "epoch": 1.0431745545710294, "grad_norm": 1.5859375, "learning_rate": 4.8935464722897525e-05, "loss": 6.8074, "step": 18170 }, { "epoch": 1.0437486842860697, "grad_norm": 1.5703125, "learning_rate": 4.888898595836475e-05, "loss": 6.8193, "step": 18180 }, { "epoch": 1.04432281400111, "grad_norm": 1.5234375, "learning_rate": 4.884250815432516e-05, "loss": 6.8144, "step": 18190 }, { "epoch": 1.0448969437161502, "grad_norm": 1.625, "learning_rate": 4.879603135095972e-05, "loss": 6.8019, "step": 18200 }, { "epoch": 1.0454710734311905, "grad_norm": 1.546875, "learning_rate": 4.874955558844854e-05, "loss": 6.8075, "step": 18210 }, { "epoch": 1.0460452031462308, "grad_norm": 1.6015625, "learning_rate": 4.870308090697082e-05, "loss": 6.799, "step": 18220 }, { "epoch": 1.0466193328612712, "grad_norm": 1.546875, "learning_rate": 4.8656607346704805e-05, "loss": 6.7688, "step": 18230 }, { "epoch": 1.0471934625763115, "grad_norm": 1.65625, "learning_rate": 4.861013494782779e-05, "loss": 6.8544, "step": 18240 }, { "epoch": 1.0477675922913516, "grad_norm": 1.5703125, "learning_rate": 4.8563663750516094e-05, "loss": 6.7944, "step": 18250 }, { "epoch": 1.048341722006392, "grad_norm": 1.5390625, "learning_rate": 4.8517193794944945e-05, "loss": 6.8, "step": 18260 }, { "epoch": 1.0489158517214323, "grad_norm": 1.5625, "learning_rate": 4.847072512128852e-05, "loss": 6.8211, "step": 18270 }, { "epoch": 1.0494899814364727, "grad_norm": 1.5859375, "learning_rate": 4.8424257769719885e-05, "loss": 6.8798, "step": 18280 }, { "epoch": 1.0500641111515128, "grad_norm": 1.546875, "learning_rate": 4.8377791780411e-05, "loss": 6.7913, "step": 18290 }, { "epoch": 1.050638240866553, "grad_norm": 1.5390625, "learning_rate": 4.8331327193532585e-05, "loss": 6.8235, "step": 18300 }, { "epoch": 1.0512123705815934, "grad_norm": 1.6640625, "learning_rate": 4.828486404925418e-05, "loss": 6.822, "step": 18310 }, { "epoch": 1.0517865002966338, "grad_norm": 1.515625, "learning_rate": 4.823840238774408e-05, "loss": 6.7757, "step": 18320 }, { "epoch": 1.052360630011674, "grad_norm": 1.5546875, "learning_rate": 4.8191942249169266e-05, "loss": 6.8006, "step": 18330 }, { "epoch": 1.0529347597267142, "grad_norm": 1.578125, "learning_rate": 4.8145483673695476e-05, "loss": 6.7836, "step": 18340 }, { "epoch": 1.0535088894417546, "grad_norm": 1.6328125, "learning_rate": 4.8099026701487024e-05, "loss": 6.8307, "step": 18350 }, { "epoch": 1.054083019156795, "grad_norm": 1.5859375, "learning_rate": 4.8052571372706866e-05, "loss": 6.7965, "step": 18360 }, { "epoch": 1.054657148871835, "grad_norm": 1.515625, "learning_rate": 4.8006117727516525e-05, "loss": 6.8181, "step": 18370 }, { "epoch": 1.0552312785868754, "grad_norm": 1.6171875, "learning_rate": 4.79596658060761e-05, "loss": 6.851, "step": 18380 }, { "epoch": 1.0558054083019157, "grad_norm": 1.5625, "learning_rate": 4.791321564854417e-05, "loss": 6.8201, "step": 18390 }, { "epoch": 1.056379538016956, "grad_norm": 1.609375, "learning_rate": 4.786676729507779e-05, "loss": 6.8427, "step": 18400 }, { "epoch": 1.0569536677319962, "grad_norm": 1.640625, "learning_rate": 4.7820320785832454e-05, "loss": 6.7823, "step": 18410 }, { "epoch": 1.0575277974470365, "grad_norm": 1.640625, "learning_rate": 4.777387616096209e-05, "loss": 6.7862, "step": 18420 }, { "epoch": 1.0581019271620768, "grad_norm": 1.5546875, "learning_rate": 4.772743346061898e-05, "loss": 6.845, "step": 18430 }, { "epoch": 1.0586760568771172, "grad_norm": 1.6015625, "learning_rate": 4.768099272495373e-05, "loss": 6.8106, "step": 18440 }, { "epoch": 1.0592501865921573, "grad_norm": 1.5859375, "learning_rate": 4.763455399411524e-05, "loss": 6.8431, "step": 18450 }, { "epoch": 1.0598243163071976, "grad_norm": 1.546875, "learning_rate": 4.7588117308250725e-05, "loss": 6.8614, "step": 18460 }, { "epoch": 1.060398446022238, "grad_norm": 1.5625, "learning_rate": 4.7541682707505584e-05, "loss": 6.8251, "step": 18470 }, { "epoch": 1.0609725757372783, "grad_norm": 1.5859375, "learning_rate": 4.7495250232023435e-05, "loss": 6.863, "step": 18480 }, { "epoch": 1.0615467054523184, "grad_norm": 1.5078125, "learning_rate": 4.744881992194605e-05, "loss": 6.8241, "step": 18490 }, { "epoch": 1.0621208351673588, "grad_norm": 1.5, "learning_rate": 4.7402391817413324e-05, "loss": 6.8399, "step": 18500 }, { "epoch": 1.0626949648823991, "grad_norm": 1.6015625, "learning_rate": 4.735596595856327e-05, "loss": 6.8467, "step": 18510 }, { "epoch": 1.0632690945974395, "grad_norm": 1.578125, "learning_rate": 4.730954238553194e-05, "loss": 6.8012, "step": 18520 }, { "epoch": 1.0638432243124796, "grad_norm": 1.5390625, "learning_rate": 4.7263121138453406e-05, "loss": 6.7794, "step": 18530 }, { "epoch": 1.06441735402752, "grad_norm": 1.53125, "learning_rate": 4.721670225745974e-05, "loss": 6.7886, "step": 18540 }, { "epoch": 1.0649914837425603, "grad_norm": 1.5390625, "learning_rate": 4.717028578268097e-05, "loss": 6.8604, "step": 18550 }, { "epoch": 1.0655656134576006, "grad_norm": 1.5625, "learning_rate": 4.712387175424504e-05, "loss": 6.7841, "step": 18560 }, { "epoch": 1.0661397431726407, "grad_norm": 1.609375, "learning_rate": 4.7077460212277775e-05, "loss": 6.8609, "step": 18570 }, { "epoch": 1.066713872887681, "grad_norm": 1.6328125, "learning_rate": 4.703105119690283e-05, "loss": 6.8164, "step": 18580 }, { "epoch": 1.0672880026027214, "grad_norm": 1.65625, "learning_rate": 4.6984644748241744e-05, "loss": 6.817, "step": 18590 }, { "epoch": 1.0678621323177617, "grad_norm": 1.640625, "learning_rate": 4.693824090641375e-05, "loss": 6.8193, "step": 18600 }, { "epoch": 1.0684362620328018, "grad_norm": 1.5703125, "learning_rate": 4.689183971153591e-05, "loss": 6.7969, "step": 18610 }, { "epoch": 1.0690103917478422, "grad_norm": 1.6015625, "learning_rate": 4.6845441203722926e-05, "loss": 6.8536, "step": 18620 }, { "epoch": 1.0695845214628825, "grad_norm": 1.59375, "learning_rate": 4.6799045423087225e-05, "loss": 6.8249, "step": 18630 }, { "epoch": 1.0701586511779229, "grad_norm": 1.59375, "learning_rate": 4.675265240973883e-05, "loss": 6.8164, "step": 18640 }, { "epoch": 1.0707327808929632, "grad_norm": 1.4765625, "learning_rate": 4.670626220378545e-05, "loss": 6.8229, "step": 18650 }, { "epoch": 1.0713069106080033, "grad_norm": 1.5390625, "learning_rate": 4.665987484533229e-05, "loss": 6.8113, "step": 18660 }, { "epoch": 1.0718810403230437, "grad_norm": 1.5859375, "learning_rate": 4.661349037448211e-05, "loss": 6.8164, "step": 18670 }, { "epoch": 1.072455170038084, "grad_norm": 1.5859375, "learning_rate": 4.6567108831335196e-05, "loss": 6.8221, "step": 18680 }, { "epoch": 1.073029299753124, "grad_norm": 1.5546875, "learning_rate": 4.65207302559893e-05, "loss": 6.816, "step": 18690 }, { "epoch": 1.0736034294681644, "grad_norm": 1.6875, "learning_rate": 4.6474354688539596e-05, "loss": 6.8139, "step": 18700 }, { "epoch": 1.0741775591832048, "grad_norm": 1.6015625, "learning_rate": 4.642798216907866e-05, "loss": 6.8234, "step": 18710 }, { "epoch": 1.0747516888982451, "grad_norm": 1.546875, "learning_rate": 4.638161273769643e-05, "loss": 6.8128, "step": 18720 }, { "epoch": 1.0753258186132855, "grad_norm": 1.5078125, "learning_rate": 4.633524643448017e-05, "loss": 6.7798, "step": 18730 }, { "epoch": 1.0758999483283256, "grad_norm": 1.5625, "learning_rate": 4.6288883299514486e-05, "loss": 6.8403, "step": 18740 }, { "epoch": 1.076474078043366, "grad_norm": 1.59375, "learning_rate": 4.624252337288117e-05, "loss": 6.8104, "step": 18750 }, { "epoch": 1.0770482077584063, "grad_norm": 1.6484375, "learning_rate": 4.619616669465929e-05, "loss": 6.8176, "step": 18760 }, { "epoch": 1.0776223374734466, "grad_norm": 1.546875, "learning_rate": 4.614981330492509e-05, "loss": 6.8101, "step": 18770 }, { "epoch": 1.0781964671884867, "grad_norm": 1.5859375, "learning_rate": 4.6103463243751995e-05, "loss": 6.7995, "step": 18780 }, { "epoch": 1.078770596903527, "grad_norm": 1.5625, "learning_rate": 4.6057116551210506e-05, "loss": 6.8148, "step": 18790 }, { "epoch": 1.0793447266185674, "grad_norm": 1.578125, "learning_rate": 4.601077326736825e-05, "loss": 6.8365, "step": 18800 }, { "epoch": 1.0799188563336077, "grad_norm": 1.5625, "learning_rate": 4.5964433432289885e-05, "loss": 6.8396, "step": 18810 }, { "epoch": 1.0804929860486479, "grad_norm": 1.5859375, "learning_rate": 4.5918097086037116e-05, "loss": 6.7852, "step": 18820 }, { "epoch": 1.0810671157636882, "grad_norm": 1.546875, "learning_rate": 4.5871764268668603e-05, "loss": 6.7954, "step": 18830 }, { "epoch": 1.0816412454787285, "grad_norm": 1.6640625, "learning_rate": 4.582543502023996e-05, "loss": 6.8054, "step": 18840 }, { "epoch": 1.0822153751937689, "grad_norm": 1.6875, "learning_rate": 4.577910938080372e-05, "loss": 6.8172, "step": 18850 }, { "epoch": 1.082789504908809, "grad_norm": 1.609375, "learning_rate": 4.573278739040932e-05, "loss": 6.826, "step": 18860 }, { "epoch": 1.0833636346238493, "grad_norm": 1.546875, "learning_rate": 4.568646908910299e-05, "loss": 6.8217, "step": 18870 }, { "epoch": 1.0839377643388897, "grad_norm": 1.59375, "learning_rate": 4.564015451692782e-05, "loss": 6.7828, "step": 18880 }, { "epoch": 1.08451189405393, "grad_norm": 1.625, "learning_rate": 4.5593843713923645e-05, "loss": 6.7962, "step": 18890 }, { "epoch": 1.0850860237689701, "grad_norm": 1.578125, "learning_rate": 4.5547536720127045e-05, "loss": 6.8068, "step": 18900 }, { "epoch": 1.0856601534840105, "grad_norm": 1.5078125, "learning_rate": 4.5501233575571337e-05, "loss": 6.831, "step": 18910 }, { "epoch": 1.0862342831990508, "grad_norm": 1.6171875, "learning_rate": 4.545493432028648e-05, "loss": 6.8396, "step": 18920 }, { "epoch": 1.0868084129140911, "grad_norm": 1.6953125, "learning_rate": 4.5408638994299066e-05, "loss": 6.8095, "step": 18930 }, { "epoch": 1.0873825426291313, "grad_norm": 1.6796875, "learning_rate": 4.536234763763231e-05, "loss": 6.7861, "step": 18940 }, { "epoch": 1.0879566723441716, "grad_norm": 1.640625, "learning_rate": 4.5316060290305996e-05, "loss": 6.7677, "step": 18950 }, { "epoch": 1.088530802059212, "grad_norm": 1.5703125, "learning_rate": 4.526977699233643e-05, "loss": 6.7941, "step": 18960 }, { "epoch": 1.0891049317742523, "grad_norm": 1.578125, "learning_rate": 4.522349778373641e-05, "loss": 6.7881, "step": 18970 }, { "epoch": 1.0896790614892924, "grad_norm": 1.5546875, "learning_rate": 4.517722270451521e-05, "loss": 6.8236, "step": 18980 }, { "epoch": 1.0902531912043327, "grad_norm": 1.578125, "learning_rate": 4.513095179467855e-05, "loss": 6.8218, "step": 18990 }, { "epoch": 1.090827320919373, "grad_norm": 1.640625, "learning_rate": 4.5084685094228524e-05, "loss": 6.8067, "step": 19000 }, { "epoch": 1.0914014506344134, "grad_norm": 1.5859375, "learning_rate": 4.503842264316359e-05, "loss": 6.8452, "step": 19010 }, { "epoch": 1.0919755803494535, "grad_norm": 1.546875, "learning_rate": 4.499216448147852e-05, "loss": 6.8308, "step": 19020 }, { "epoch": 1.0925497100644939, "grad_norm": 1.6640625, "learning_rate": 4.494591064916441e-05, "loss": 6.8074, "step": 19030 }, { "epoch": 1.0931238397795342, "grad_norm": 1.5859375, "learning_rate": 4.489966118620859e-05, "loss": 6.7477, "step": 19040 }, { "epoch": 1.0936979694945745, "grad_norm": 1.5625, "learning_rate": 4.485341613259462e-05, "loss": 6.8247, "step": 19050 }, { "epoch": 1.0942720992096147, "grad_norm": 1.6640625, "learning_rate": 4.4807175528302234e-05, "loss": 6.808, "step": 19060 }, { "epoch": 1.094846228924655, "grad_norm": 1.5703125, "learning_rate": 4.4760939413307355e-05, "loss": 6.8517, "step": 19070 }, { "epoch": 1.0954203586396953, "grad_norm": 1.578125, "learning_rate": 4.4714707827581994e-05, "loss": 6.8317, "step": 19080 }, { "epoch": 1.0959944883547357, "grad_norm": 1.546875, "learning_rate": 4.466848081109424e-05, "loss": 6.833, "step": 19090 }, { "epoch": 1.0965686180697758, "grad_norm": 1.625, "learning_rate": 4.4622258403808226e-05, "loss": 6.8285, "step": 19100 }, { "epoch": 1.0971427477848161, "grad_norm": 1.546875, "learning_rate": 4.4576040645684174e-05, "loss": 6.8295, "step": 19110 }, { "epoch": 1.0977168774998565, "grad_norm": 1.6171875, "learning_rate": 4.452982757667821e-05, "loss": 6.7981, "step": 19120 }, { "epoch": 1.0982910072148968, "grad_norm": 1.5234375, "learning_rate": 4.44836192367424e-05, "loss": 6.8319, "step": 19130 }, { "epoch": 1.0988651369299371, "grad_norm": 1.640625, "learning_rate": 4.44374156658248e-05, "loss": 6.8486, "step": 19140 }, { "epoch": 1.0994392666449773, "grad_norm": 1.6328125, "learning_rate": 4.439121690386926e-05, "loss": 6.7847, "step": 19150 }, { "epoch": 1.1000133963600176, "grad_norm": 1.578125, "learning_rate": 4.434502299081551e-05, "loss": 6.7814, "step": 19160 }, { "epoch": 1.100587526075058, "grad_norm": 1.6328125, "learning_rate": 4.429883396659908e-05, "loss": 6.7948, "step": 19170 }, { "epoch": 1.101161655790098, "grad_norm": 1.546875, "learning_rate": 4.42526498711513e-05, "loss": 6.7628, "step": 19180 }, { "epoch": 1.1017357855051384, "grad_norm": 1.484375, "learning_rate": 4.42064707443992e-05, "loss": 6.8418, "step": 19190 }, { "epoch": 1.1023099152201787, "grad_norm": 1.578125, "learning_rate": 4.416029662626553e-05, "loss": 6.7977, "step": 19200 }, { "epoch": 1.102884044935219, "grad_norm": 1.6171875, "learning_rate": 4.41141275566687e-05, "loss": 6.7637, "step": 19210 }, { "epoch": 1.1034581746502594, "grad_norm": 1.609375, "learning_rate": 4.40679635755228e-05, "loss": 6.8116, "step": 19220 }, { "epoch": 1.1040323043652995, "grad_norm": 1.6171875, "learning_rate": 4.4021804722737466e-05, "loss": 6.8296, "step": 19230 }, { "epoch": 1.1046064340803399, "grad_norm": 1.6015625, "learning_rate": 4.3975651038217916e-05, "loss": 6.7715, "step": 19240 }, { "epoch": 1.1051805637953802, "grad_norm": 1.59375, "learning_rate": 4.39295025618649e-05, "loss": 6.821, "step": 19250 }, { "epoch": 1.1057546935104205, "grad_norm": 1.46875, "learning_rate": 4.3883359333574695e-05, "loss": 6.8011, "step": 19260 }, { "epoch": 1.1063288232254607, "grad_norm": 1.5859375, "learning_rate": 4.3837221393239015e-05, "loss": 6.8172, "step": 19270 }, { "epoch": 1.106902952940501, "grad_norm": 1.4921875, "learning_rate": 4.3791088780744984e-05, "loss": 6.7852, "step": 19280 }, { "epoch": 1.1074770826555413, "grad_norm": 1.5390625, "learning_rate": 4.374496153597514e-05, "loss": 6.8689, "step": 19290 }, { "epoch": 1.1080512123705817, "grad_norm": 1.5390625, "learning_rate": 4.3698839698807415e-05, "loss": 6.7963, "step": 19300 }, { "epoch": 1.1086253420856218, "grad_norm": 1.609375, "learning_rate": 4.3652723309115e-05, "loss": 6.7908, "step": 19310 }, { "epoch": 1.1091994718006621, "grad_norm": 1.5703125, "learning_rate": 4.360661240676642e-05, "loss": 6.8105, "step": 19320 }, { "epoch": 1.1097736015157025, "grad_norm": 1.609375, "learning_rate": 4.3560507031625435e-05, "loss": 6.7767, "step": 19330 }, { "epoch": 1.1103477312307428, "grad_norm": 1.53125, "learning_rate": 4.351440722355104e-05, "loss": 6.7945, "step": 19340 }, { "epoch": 1.110921860945783, "grad_norm": 1.6171875, "learning_rate": 4.346831302239743e-05, "loss": 6.8243, "step": 19350 }, { "epoch": 1.1114959906608233, "grad_norm": 1.5859375, "learning_rate": 4.342222446801392e-05, "loss": 6.8118, "step": 19360 }, { "epoch": 1.1120701203758636, "grad_norm": 1.5234375, "learning_rate": 4.3376141600244957e-05, "loss": 6.822, "step": 19370 }, { "epoch": 1.112644250090904, "grad_norm": 1.6015625, "learning_rate": 4.3330064458930076e-05, "loss": 6.8116, "step": 19380 }, { "epoch": 1.113218379805944, "grad_norm": 1.5078125, "learning_rate": 4.328399308390387e-05, "loss": 6.8233, "step": 19390 }, { "epoch": 1.1137925095209844, "grad_norm": 1.6015625, "learning_rate": 4.323792751499593e-05, "loss": 6.8053, "step": 19400 }, { "epoch": 1.1143666392360247, "grad_norm": 1.5234375, "learning_rate": 4.3191867792030834e-05, "loss": 6.8136, "step": 19410 }, { "epoch": 1.114940768951065, "grad_norm": 1.7109375, "learning_rate": 4.314581395482809e-05, "loss": 6.7782, "step": 19420 }, { "epoch": 1.1155148986661052, "grad_norm": 1.6015625, "learning_rate": 4.309976604320217e-05, "loss": 6.7782, "step": 19430 }, { "epoch": 1.1160890283811455, "grad_norm": 1.59375, "learning_rate": 4.305372409696236e-05, "loss": 6.8324, "step": 19440 }, { "epoch": 1.1166631580961859, "grad_norm": 1.546875, "learning_rate": 4.300768815591282e-05, "loss": 6.8208, "step": 19450 }, { "epoch": 1.1172372878112262, "grad_norm": 1.59375, "learning_rate": 4.296165825985251e-05, "loss": 6.8429, "step": 19460 }, { "epoch": 1.1178114175262663, "grad_norm": 1.5625, "learning_rate": 4.2915634448575184e-05, "loss": 6.8359, "step": 19470 }, { "epoch": 1.1183855472413067, "grad_norm": 1.6171875, "learning_rate": 4.2869616761869304e-05, "loss": 6.818, "step": 19480 }, { "epoch": 1.118959676956347, "grad_norm": 1.53125, "learning_rate": 4.282360523951806e-05, "loss": 6.7769, "step": 19490 }, { "epoch": 1.1195338066713874, "grad_norm": 1.578125, "learning_rate": 4.2777599921299304e-05, "loss": 6.7761, "step": 19500 }, { "epoch": 1.1201079363864275, "grad_norm": 1.5390625, "learning_rate": 4.273160084698552e-05, "loss": 6.7693, "step": 19510 }, { "epoch": 1.1206820661014678, "grad_norm": 1.515625, "learning_rate": 4.268560805634382e-05, "loss": 6.7573, "step": 19520 }, { "epoch": 1.1212561958165081, "grad_norm": 1.6015625, "learning_rate": 4.2639621589135845e-05, "loss": 6.8521, "step": 19530 }, { "epoch": 1.1218303255315485, "grad_norm": 1.5625, "learning_rate": 4.259364148511779e-05, "loss": 6.787, "step": 19540 }, { "epoch": 1.1224044552465888, "grad_norm": 1.5625, "learning_rate": 4.254766778404034e-05, "loss": 6.8294, "step": 19550 }, { "epoch": 1.122978584961629, "grad_norm": 1.515625, "learning_rate": 4.250170052564868e-05, "loss": 6.8143, "step": 19560 }, { "epoch": 1.1235527146766693, "grad_norm": 1.5390625, "learning_rate": 4.2455739749682374e-05, "loss": 6.8112, "step": 19570 }, { "epoch": 1.1241268443917096, "grad_norm": 1.5546875, "learning_rate": 4.24097854958754e-05, "loss": 6.8071, "step": 19580 }, { "epoch": 1.1247009741067497, "grad_norm": 1.515625, "learning_rate": 4.2363837803956115e-05, "loss": 6.8058, "step": 19590 }, { "epoch": 1.12527510382179, "grad_norm": 1.71875, "learning_rate": 4.2317896713647185e-05, "loss": 6.821, "step": 19600 }, { "epoch": 1.1258492335368304, "grad_norm": 1.609375, "learning_rate": 4.2271962264665575e-05, "loss": 6.8413, "step": 19610 }, { "epoch": 1.1264233632518708, "grad_norm": 1.5234375, "learning_rate": 4.222603449672249e-05, "loss": 6.7789, "step": 19620 }, { "epoch": 1.126997492966911, "grad_norm": 1.5078125, "learning_rate": 4.218011344952341e-05, "loss": 6.7947, "step": 19630 }, { "epoch": 1.1275716226819512, "grad_norm": 1.5859375, "learning_rate": 4.2134199162767956e-05, "loss": 6.8209, "step": 19640 }, { "epoch": 1.1281457523969916, "grad_norm": 1.5546875, "learning_rate": 4.208829167614991e-05, "loss": 6.7714, "step": 19650 }, { "epoch": 1.128719882112032, "grad_norm": 1.5546875, "learning_rate": 4.20423910293572e-05, "loss": 6.8094, "step": 19660 }, { "epoch": 1.129294011827072, "grad_norm": 1.4765625, "learning_rate": 4.199649726207181e-05, "loss": 6.8415, "step": 19670 }, { "epoch": 1.1298681415421123, "grad_norm": 1.546875, "learning_rate": 4.1950610413969814e-05, "loss": 6.7762, "step": 19680 }, { "epoch": 1.1304422712571527, "grad_norm": 1.5234375, "learning_rate": 4.190473052472125e-05, "loss": 6.8122, "step": 19690 }, { "epoch": 1.131016400972193, "grad_norm": 1.6875, "learning_rate": 4.1858857633990204e-05, "loss": 6.7658, "step": 19700 }, { "epoch": 1.1315905306872334, "grad_norm": 1.5390625, "learning_rate": 4.181299178143467e-05, "loss": 6.7999, "step": 19710 }, { "epoch": 1.1321646604022735, "grad_norm": 1.5703125, "learning_rate": 4.1767133006706555e-05, "loss": 6.8033, "step": 19720 }, { "epoch": 1.1327387901173138, "grad_norm": 1.5546875, "learning_rate": 4.172128134945167e-05, "loss": 6.8088, "step": 19730 }, { "epoch": 1.1333129198323542, "grad_norm": 1.59375, "learning_rate": 4.167543684930966e-05, "loss": 6.7741, "step": 19740 }, { "epoch": 1.1338870495473945, "grad_norm": 1.625, "learning_rate": 4.162959954591399e-05, "loss": 6.8355, "step": 19750 }, { "epoch": 1.1344611792624346, "grad_norm": 1.65625, "learning_rate": 4.1583769478891885e-05, "loss": 6.7529, "step": 19760 }, { "epoch": 1.135035308977475, "grad_norm": 1.5625, "learning_rate": 4.153794668786435e-05, "loss": 6.8089, "step": 19770 }, { "epoch": 1.1356094386925153, "grad_norm": 1.5546875, "learning_rate": 4.149213121244604e-05, "loss": 6.7903, "step": 19780 }, { "epoch": 1.1361835684075556, "grad_norm": 1.515625, "learning_rate": 4.144632309224536e-05, "loss": 6.8052, "step": 19790 }, { "epoch": 1.1367576981225957, "grad_norm": 1.625, "learning_rate": 4.1400522366864306e-05, "loss": 6.8578, "step": 19800 }, { "epoch": 1.137331827837636, "grad_norm": 1.6015625, "learning_rate": 4.135472907589849e-05, "loss": 6.8058, "step": 19810 }, { "epoch": 1.1379059575526764, "grad_norm": 1.4921875, "learning_rate": 4.130894325893708e-05, "loss": 6.8482, "step": 19820 }, { "epoch": 1.1384800872677168, "grad_norm": 1.546875, "learning_rate": 4.126316495556284e-05, "loss": 6.7568, "step": 19830 }, { "epoch": 1.1390542169827569, "grad_norm": 1.6796875, "learning_rate": 4.121739420535199e-05, "loss": 6.8415, "step": 19840 }, { "epoch": 1.1396283466977972, "grad_norm": 1.6015625, "learning_rate": 4.117163104787422e-05, "loss": 6.803, "step": 19850 }, { "epoch": 1.1402024764128376, "grad_norm": 1.609375, "learning_rate": 4.112587552269267e-05, "loss": 6.7807, "step": 19860 }, { "epoch": 1.140776606127878, "grad_norm": 1.4921875, "learning_rate": 4.108012766936389e-05, "loss": 6.8434, "step": 19870 }, { "epoch": 1.141350735842918, "grad_norm": 1.5546875, "learning_rate": 4.103438752743778e-05, "loss": 6.8533, "step": 19880 }, { "epoch": 1.1419248655579584, "grad_norm": 1.5390625, "learning_rate": 4.0988655136457583e-05, "loss": 6.8059, "step": 19890 }, { "epoch": 1.1424989952729987, "grad_norm": 1.625, "learning_rate": 4.094293053595983e-05, "loss": 6.7928, "step": 19900 }, { "epoch": 1.143073124988039, "grad_norm": 1.5546875, "learning_rate": 4.089721376547433e-05, "loss": 6.8311, "step": 19910 }, { "epoch": 1.1436472547030792, "grad_norm": 1.6015625, "learning_rate": 4.085150486452412e-05, "loss": 6.8427, "step": 19920 }, { "epoch": 1.1442213844181195, "grad_norm": 1.6953125, "learning_rate": 4.0805803872625434e-05, "loss": 6.7816, "step": 19930 }, { "epoch": 1.1447955141331598, "grad_norm": 1.6796875, "learning_rate": 4.076011082928766e-05, "loss": 6.8232, "step": 19940 }, { "epoch": 1.1453696438482002, "grad_norm": 1.6015625, "learning_rate": 4.071442577401331e-05, "loss": 6.7945, "step": 19950 }, { "epoch": 1.1459437735632405, "grad_norm": 1.5625, "learning_rate": 4.0668748746298026e-05, "loss": 6.813, "step": 19960 }, { "epoch": 1.1465179032782806, "grad_norm": 1.484375, "learning_rate": 4.062307978563047e-05, "loss": 6.8593, "step": 19970 }, { "epoch": 1.147092032993321, "grad_norm": 1.59375, "learning_rate": 4.057741893149234e-05, "loss": 6.81, "step": 19980 }, { "epoch": 1.1476661627083613, "grad_norm": 1.5859375, "learning_rate": 4.053176622335834e-05, "loss": 6.7886, "step": 19990 }, { "epoch": 1.1482402924234014, "grad_norm": 1.6171875, "learning_rate": 4.048612170069612e-05, "loss": 6.8227, "step": 20000 }, { "epoch": 1.1488144221384418, "grad_norm": 1.6328125, "learning_rate": 4.0440485402966254e-05, "loss": 6.8347, "step": 20010 }, { "epoch": 1.149388551853482, "grad_norm": 1.6484375, "learning_rate": 4.039485736962221e-05, "loss": 6.7834, "step": 20020 }, { "epoch": 1.1499626815685224, "grad_norm": 1.5859375, "learning_rate": 4.034923764011029e-05, "loss": 6.8269, "step": 20030 }, { "epoch": 1.1505368112835628, "grad_norm": 1.546875, "learning_rate": 4.0303626253869655e-05, "loss": 6.8141, "step": 20040 }, { "epoch": 1.151110940998603, "grad_norm": 1.578125, "learning_rate": 4.0258023250332235e-05, "loss": 6.7619, "step": 20050 }, { "epoch": 1.1516850707136432, "grad_norm": 1.59375, "learning_rate": 4.02124286689227e-05, "loss": 6.8245, "step": 20060 }, { "epoch": 1.1522592004286836, "grad_norm": 1.6953125, "learning_rate": 4.016684254905845e-05, "loss": 6.795, "step": 20070 }, { "epoch": 1.1528333301437237, "grad_norm": 1.5078125, "learning_rate": 4.012126493014957e-05, "loss": 6.7862, "step": 20080 }, { "epoch": 1.153407459858764, "grad_norm": 1.6171875, "learning_rate": 4.007569585159881e-05, "loss": 6.8271, "step": 20090 }, { "epoch": 1.1539815895738044, "grad_norm": 1.6171875, "learning_rate": 4.0030135352801505e-05, "loss": 6.7944, "step": 20100 }, { "epoch": 1.1545557192888447, "grad_norm": 1.6015625, "learning_rate": 3.99845834731456e-05, "loss": 6.8388, "step": 20110 }, { "epoch": 1.155129849003885, "grad_norm": 1.5859375, "learning_rate": 3.993904025201157e-05, "loss": 6.7978, "step": 20120 }, { "epoch": 1.1557039787189252, "grad_norm": 1.5859375, "learning_rate": 3.9893505728772423e-05, "loss": 6.7911, "step": 20130 }, { "epoch": 1.1562781084339655, "grad_norm": 1.6015625, "learning_rate": 3.984797994279363e-05, "loss": 6.8024, "step": 20140 }, { "epoch": 1.1568522381490058, "grad_norm": 1.5859375, "learning_rate": 3.9802462933433106e-05, "loss": 6.7964, "step": 20150 }, { "epoch": 1.1574263678640462, "grad_norm": 1.65625, "learning_rate": 3.975695474004123e-05, "loss": 6.7825, "step": 20160 }, { "epoch": 1.1580004975790863, "grad_norm": 1.5078125, "learning_rate": 3.9711455401960675e-05, "loss": 6.7821, "step": 20170 }, { "epoch": 1.1585746272941266, "grad_norm": 1.5234375, "learning_rate": 3.9665964958526516e-05, "loss": 6.8397, "step": 20180 }, { "epoch": 1.159148757009167, "grad_norm": 1.578125, "learning_rate": 3.962048344906612e-05, "loss": 6.7868, "step": 20190 }, { "epoch": 1.1597228867242073, "grad_norm": 1.546875, "learning_rate": 3.957501091289916e-05, "loss": 6.821, "step": 20200 }, { "epoch": 1.1602970164392474, "grad_norm": 1.6484375, "learning_rate": 3.95295473893375e-05, "loss": 6.8652, "step": 20210 }, { "epoch": 1.1608711461542878, "grad_norm": 1.59375, "learning_rate": 3.9484092917685214e-05, "loss": 6.8374, "step": 20220 }, { "epoch": 1.161445275869328, "grad_norm": 1.578125, "learning_rate": 3.943864753723863e-05, "loss": 6.7714, "step": 20230 }, { "epoch": 1.1620194055843684, "grad_norm": 1.5078125, "learning_rate": 3.939321128728613e-05, "loss": 6.8082, "step": 20240 }, { "epoch": 1.1625935352994086, "grad_norm": 1.5078125, "learning_rate": 3.934778420710824e-05, "loss": 6.8283, "step": 20250 }, { "epoch": 1.163167665014449, "grad_norm": 1.6328125, "learning_rate": 3.9302366335977535e-05, "loss": 6.8022, "step": 20260 }, { "epoch": 1.1637417947294892, "grad_norm": 1.5546875, "learning_rate": 3.925695771315867e-05, "loss": 6.7978, "step": 20270 }, { "epoch": 1.1643159244445296, "grad_norm": 1.546875, "learning_rate": 3.921155837790828e-05, "loss": 6.8335, "step": 20280 }, { "epoch": 1.1648900541595697, "grad_norm": 1.78125, "learning_rate": 3.916616836947495e-05, "loss": 6.7864, "step": 20290 }, { "epoch": 1.16546418387461, "grad_norm": 1.5234375, "learning_rate": 3.9120787727099226e-05, "loss": 6.7743, "step": 20300 }, { "epoch": 1.1660383135896504, "grad_norm": 1.5234375, "learning_rate": 3.9075416490013573e-05, "loss": 6.7906, "step": 20310 }, { "epoch": 1.1666124433046907, "grad_norm": 1.5703125, "learning_rate": 3.90300546974423e-05, "loss": 6.7976, "step": 20320 }, { "epoch": 1.1671865730197308, "grad_norm": 1.59375, "learning_rate": 3.8984702388601544e-05, "loss": 6.8192, "step": 20330 }, { "epoch": 1.1677607027347712, "grad_norm": 1.5078125, "learning_rate": 3.893935960269927e-05, "loss": 6.8099, "step": 20340 }, { "epoch": 1.1683348324498115, "grad_norm": 1.5234375, "learning_rate": 3.889402637893518e-05, "loss": 6.8522, "step": 20350 }, { "epoch": 1.1689089621648519, "grad_norm": 1.515625, "learning_rate": 3.8848702756500736e-05, "loss": 6.862, "step": 20360 }, { "epoch": 1.169483091879892, "grad_norm": 1.5234375, "learning_rate": 3.88033887745791e-05, "loss": 6.8081, "step": 20370 }, { "epoch": 1.1700572215949323, "grad_norm": 1.5546875, "learning_rate": 3.8758084472345064e-05, "loss": 6.8136, "step": 20380 }, { "epoch": 1.1706313513099726, "grad_norm": 1.515625, "learning_rate": 3.871278988896508e-05, "loss": 6.8325, "step": 20390 }, { "epoch": 1.171205481025013, "grad_norm": 1.578125, "learning_rate": 3.8667505063597215e-05, "loss": 6.8118, "step": 20400 }, { "epoch": 1.171779610740053, "grad_norm": 1.6640625, "learning_rate": 3.862223003539107e-05, "loss": 6.7854, "step": 20410 }, { "epoch": 1.1723537404550934, "grad_norm": 1.6171875, "learning_rate": 3.857696484348777e-05, "loss": 6.7856, "step": 20420 }, { "epoch": 1.1729278701701338, "grad_norm": 1.5625, "learning_rate": 3.853170952701996e-05, "loss": 6.8181, "step": 20430 }, { "epoch": 1.1735019998851741, "grad_norm": 1.5390625, "learning_rate": 3.848646412511175e-05, "loss": 6.7811, "step": 20440 }, { "epoch": 1.1740761296002145, "grad_norm": 1.625, "learning_rate": 3.844122867687867e-05, "loss": 6.8026, "step": 20450 }, { "epoch": 1.1746502593152546, "grad_norm": 1.640625, "learning_rate": 3.839600322142762e-05, "loss": 6.8377, "step": 20460 }, { "epoch": 1.175224389030295, "grad_norm": 1.5625, "learning_rate": 3.835078779785689e-05, "loss": 6.7971, "step": 20470 }, { "epoch": 1.1757985187453353, "grad_norm": 1.59375, "learning_rate": 3.830558244525611e-05, "loss": 6.7962, "step": 20480 }, { "epoch": 1.1763726484603754, "grad_norm": 1.6328125, "learning_rate": 3.826038720270616e-05, "loss": 6.8334, "step": 20490 }, { "epoch": 1.1769467781754157, "grad_norm": 1.546875, "learning_rate": 3.821520210927922e-05, "loss": 6.8429, "step": 20500 }, { "epoch": 1.177520907890456, "grad_norm": 1.5234375, "learning_rate": 3.817002720403868e-05, "loss": 6.8043, "step": 20510 }, { "epoch": 1.1780950376054964, "grad_norm": 1.5390625, "learning_rate": 3.812486252603909e-05, "loss": 6.8029, "step": 20520 }, { "epoch": 1.1786691673205367, "grad_norm": 1.6015625, "learning_rate": 3.807970811432625e-05, "loss": 6.8094, "step": 20530 }, { "epoch": 1.1792432970355768, "grad_norm": 1.5234375, "learning_rate": 3.803456400793698e-05, "loss": 6.7932, "step": 20540 }, { "epoch": 1.1798174267506172, "grad_norm": 1.578125, "learning_rate": 3.798943024589924e-05, "loss": 6.8151, "step": 20550 }, { "epoch": 1.1803915564656575, "grad_norm": 1.4765625, "learning_rate": 3.794430686723205e-05, "loss": 6.8063, "step": 20560 }, { "epoch": 1.1809656861806976, "grad_norm": 1.5625, "learning_rate": 3.789919391094546e-05, "loss": 6.8127, "step": 20570 }, { "epoch": 1.181539815895738, "grad_norm": 1.5546875, "learning_rate": 3.7854091416040475e-05, "loss": 6.8294, "step": 20580 }, { "epoch": 1.1821139456107783, "grad_norm": 1.5234375, "learning_rate": 3.780899942150908e-05, "loss": 6.8127, "step": 20590 }, { "epoch": 1.1826880753258187, "grad_norm": 1.6484375, "learning_rate": 3.776391796633418e-05, "loss": 6.8079, "step": 20600 }, { "epoch": 1.183262205040859, "grad_norm": 1.640625, "learning_rate": 3.7718847089489584e-05, "loss": 6.8004, "step": 20610 }, { "epoch": 1.1838363347558991, "grad_norm": 1.5390625, "learning_rate": 3.7673786829939924e-05, "loss": 6.7828, "step": 20620 }, { "epoch": 1.1844104644709395, "grad_norm": 1.6640625, "learning_rate": 3.762873722664067e-05, "loss": 6.8034, "step": 20630 }, { "epoch": 1.1849845941859798, "grad_norm": 1.6328125, "learning_rate": 3.758369831853806e-05, "loss": 6.8227, "step": 20640 }, { "epoch": 1.1855587239010201, "grad_norm": 1.46875, "learning_rate": 3.753867014456914e-05, "loss": 6.8206, "step": 20650 }, { "epoch": 1.1861328536160602, "grad_norm": 1.5234375, "learning_rate": 3.749365274366163e-05, "loss": 6.7616, "step": 20660 }, { "epoch": 1.1867069833311006, "grad_norm": 1.5546875, "learning_rate": 3.744864615473391e-05, "loss": 6.86, "step": 20670 }, { "epoch": 1.187281113046141, "grad_norm": 1.5859375, "learning_rate": 3.74036504166951e-05, "loss": 6.7983, "step": 20680 }, { "epoch": 1.1878552427611813, "grad_norm": 1.578125, "learning_rate": 3.7358665568444864e-05, "loss": 6.8181, "step": 20690 }, { "epoch": 1.1884293724762214, "grad_norm": 1.6171875, "learning_rate": 3.731369164887347e-05, "loss": 6.7961, "step": 20700 }, { "epoch": 1.1890035021912617, "grad_norm": 1.453125, "learning_rate": 3.726872869686176e-05, "loss": 6.8288, "step": 20710 }, { "epoch": 1.189577631906302, "grad_norm": 1.609375, "learning_rate": 3.722377675128108e-05, "loss": 6.8376, "step": 20720 }, { "epoch": 1.1901517616213424, "grad_norm": 1.6328125, "learning_rate": 3.717883585099324e-05, "loss": 6.7921, "step": 20730 }, { "epoch": 1.1907258913363825, "grad_norm": 1.515625, "learning_rate": 3.713390603485053e-05, "loss": 6.8358, "step": 20740 }, { "epoch": 1.1913000210514229, "grad_norm": 1.5703125, "learning_rate": 3.708898734169563e-05, "loss": 6.8181, "step": 20750 }, { "epoch": 1.1918741507664632, "grad_norm": 1.6484375, "learning_rate": 3.704407981036167e-05, "loss": 6.781, "step": 20760 }, { "epoch": 1.1924482804815035, "grad_norm": 1.59375, "learning_rate": 3.699918347967204e-05, "loss": 6.818, "step": 20770 }, { "epoch": 1.1930224101965436, "grad_norm": 1.5546875, "learning_rate": 3.6954298388440494e-05, "loss": 6.781, "step": 20780 }, { "epoch": 1.193596539911584, "grad_norm": 1.5234375, "learning_rate": 3.690942457547106e-05, "loss": 6.8087, "step": 20790 }, { "epoch": 1.1941706696266243, "grad_norm": 1.6640625, "learning_rate": 3.686456207955805e-05, "loss": 6.8211, "step": 20800 }, { "epoch": 1.1947447993416647, "grad_norm": 1.53125, "learning_rate": 3.681971093948594e-05, "loss": 6.8242, "step": 20810 }, { "epoch": 1.1953189290567048, "grad_norm": 1.625, "learning_rate": 3.677487119402941e-05, "loss": 6.7892, "step": 20820 }, { "epoch": 1.1958930587717451, "grad_norm": 1.578125, "learning_rate": 3.673004288195328e-05, "loss": 6.8204, "step": 20830 }, { "epoch": 1.1964671884867855, "grad_norm": 1.53125, "learning_rate": 3.668522604201252e-05, "loss": 6.822, "step": 20840 }, { "epoch": 1.1970413182018258, "grad_norm": 1.5390625, "learning_rate": 3.664042071295214e-05, "loss": 6.8165, "step": 20850 }, { "epoch": 1.1976154479168661, "grad_norm": 1.453125, "learning_rate": 3.659562693350723e-05, "loss": 6.8037, "step": 20860 }, { "epoch": 1.1981895776319063, "grad_norm": 1.5859375, "learning_rate": 3.655084474240286e-05, "loss": 6.8077, "step": 20870 }, { "epoch": 1.1987637073469466, "grad_norm": 1.4921875, "learning_rate": 3.650607417835412e-05, "loss": 6.839, "step": 20880 }, { "epoch": 1.199337837061987, "grad_norm": 1.53125, "learning_rate": 3.646131528006604e-05, "loss": 6.8457, "step": 20890 }, { "epoch": 1.199911966777027, "grad_norm": 1.5859375, "learning_rate": 3.641656808623353e-05, "loss": 6.8387, "step": 20900 }, { "epoch": 1.2004860964920674, "grad_norm": 1.6171875, "learning_rate": 3.637183263554143e-05, "loss": 6.8158, "step": 20910 }, { "epoch": 1.2010602262071077, "grad_norm": 1.640625, "learning_rate": 3.632710896666437e-05, "loss": 6.797, "step": 20920 }, { "epoch": 1.201634355922148, "grad_norm": 1.5859375, "learning_rate": 3.6282397118266876e-05, "loss": 6.8049, "step": 20930 }, { "epoch": 1.2022084856371884, "grad_norm": 1.625, "learning_rate": 3.623769712900319e-05, "loss": 6.7645, "step": 20940 }, { "epoch": 1.2027826153522285, "grad_norm": 1.625, "learning_rate": 3.6193009037517314e-05, "loss": 6.7906, "step": 20950 }, { "epoch": 1.2033567450672689, "grad_norm": 1.5, "learning_rate": 3.614833288244295e-05, "loss": 6.8026, "step": 20960 }, { "epoch": 1.2039308747823092, "grad_norm": 1.5546875, "learning_rate": 3.6103668702403546e-05, "loss": 6.8169, "step": 20970 }, { "epoch": 1.2045050044973493, "grad_norm": 1.546875, "learning_rate": 3.6059016536012124e-05, "loss": 6.8261, "step": 20980 }, { "epoch": 1.2050791342123897, "grad_norm": 1.6171875, "learning_rate": 3.601437642187135e-05, "loss": 6.8157, "step": 20990 }, { "epoch": 1.20565326392743, "grad_norm": 1.5625, "learning_rate": 3.5969748398573474e-05, "loss": 6.7973, "step": 21000 }, { "epoch": 1.2062273936424703, "grad_norm": 1.6171875, "learning_rate": 3.5925132504700286e-05, "loss": 6.7928, "step": 21010 }, { "epoch": 1.2068015233575107, "grad_norm": 1.5, "learning_rate": 3.58805287788231e-05, "loss": 6.8132, "step": 21020 }, { "epoch": 1.2073756530725508, "grad_norm": 1.5859375, "learning_rate": 3.583593725950268e-05, "loss": 6.8118, "step": 21030 }, { "epoch": 1.2079497827875911, "grad_norm": 1.5703125, "learning_rate": 3.5791357985289277e-05, "loss": 6.821, "step": 21040 }, { "epoch": 1.2085239125026315, "grad_norm": 1.6015625, "learning_rate": 3.5746790994722534e-05, "loss": 6.8024, "step": 21050 }, { "epoch": 1.2090980422176718, "grad_norm": 1.59375, "learning_rate": 3.570223632633148e-05, "loss": 6.8069, "step": 21060 }, { "epoch": 1.209672171932712, "grad_norm": 1.5390625, "learning_rate": 3.56576940186345e-05, "loss": 6.7979, "step": 21070 }, { "epoch": 1.2102463016477523, "grad_norm": 1.53125, "learning_rate": 3.5613164110139275e-05, "loss": 6.7888, "step": 21080 }, { "epoch": 1.2108204313627926, "grad_norm": 1.5859375, "learning_rate": 3.556864663934275e-05, "loss": 6.8396, "step": 21090 }, { "epoch": 1.211394561077833, "grad_norm": 1.609375, "learning_rate": 3.552414164473118e-05, "loss": 6.8034, "step": 21100 }, { "epoch": 1.211968690792873, "grad_norm": 1.515625, "learning_rate": 3.547964916477998e-05, "loss": 6.8231, "step": 21110 }, { "epoch": 1.2125428205079134, "grad_norm": 1.578125, "learning_rate": 3.543516923795377e-05, "loss": 6.8226, "step": 21120 }, { "epoch": 1.2131169502229537, "grad_norm": 1.6875, "learning_rate": 3.539070190270629e-05, "loss": 6.8056, "step": 21130 }, { "epoch": 1.213691079937994, "grad_norm": 1.5078125, "learning_rate": 3.534624719748043e-05, "loss": 6.7595, "step": 21140 }, { "epoch": 1.2142652096530342, "grad_norm": 1.5859375, "learning_rate": 3.530180516070815e-05, "loss": 6.7858, "step": 21150 }, { "epoch": 1.2148393393680745, "grad_norm": 1.5078125, "learning_rate": 3.525737583081044e-05, "loss": 6.8228, "step": 21160 }, { "epoch": 1.2154134690831149, "grad_norm": 1.5859375, "learning_rate": 3.521295924619731e-05, "loss": 6.7693, "step": 21170 }, { "epoch": 1.2159875987981552, "grad_norm": 1.578125, "learning_rate": 3.516855544526779e-05, "loss": 6.7931, "step": 21180 }, { "epoch": 1.2165617285131953, "grad_norm": 1.7109375, "learning_rate": 3.51241644664098e-05, "loss": 6.7883, "step": 21190 }, { "epoch": 1.2171358582282357, "grad_norm": 1.5078125, "learning_rate": 3.50797863480002e-05, "loss": 6.8149, "step": 21200 }, { "epoch": 1.217709987943276, "grad_norm": 1.484375, "learning_rate": 3.503542112840476e-05, "loss": 6.8273, "step": 21210 }, { "epoch": 1.2182841176583163, "grad_norm": 1.53125, "learning_rate": 3.4991068845978056e-05, "loss": 6.7913, "step": 21220 }, { "epoch": 1.2188582473733565, "grad_norm": 1.59375, "learning_rate": 3.494672953906349e-05, "loss": 6.7956, "step": 21230 }, { "epoch": 1.2194323770883968, "grad_norm": 1.53125, "learning_rate": 3.490240324599328e-05, "loss": 6.8015, "step": 21240 }, { "epoch": 1.2200065068034371, "grad_norm": 1.6171875, "learning_rate": 3.485809000508834e-05, "loss": 6.8228, "step": 21250 }, { "epoch": 1.2205806365184775, "grad_norm": 1.609375, "learning_rate": 3.481378985465833e-05, "loss": 6.8111, "step": 21260 }, { "epoch": 1.2211547662335176, "grad_norm": 1.6328125, "learning_rate": 3.476950283300159e-05, "loss": 6.779, "step": 21270 }, { "epoch": 1.221728895948558, "grad_norm": 1.5625, "learning_rate": 3.472522897840512e-05, "loss": 6.808, "step": 21280 }, { "epoch": 1.2223030256635983, "grad_norm": 1.5390625, "learning_rate": 3.468096832914452e-05, "loss": 6.801, "step": 21290 }, { "epoch": 1.2228771553786386, "grad_norm": 1.5, "learning_rate": 3.463672092348399e-05, "loss": 6.8156, "step": 21300 }, { "epoch": 1.2234512850936787, "grad_norm": 1.5625, "learning_rate": 3.4592486799676256e-05, "loss": 6.8023, "step": 21310 }, { "epoch": 1.224025414808719, "grad_norm": 1.6328125, "learning_rate": 3.454826599596256e-05, "loss": 6.7724, "step": 21320 }, { "epoch": 1.2245995445237594, "grad_norm": 1.6015625, "learning_rate": 3.450405855057268e-05, "loss": 6.8353, "step": 21330 }, { "epoch": 1.2251736742387997, "grad_norm": 1.640625, "learning_rate": 3.44598645017248e-05, "loss": 6.8009, "step": 21340 }, { "epoch": 1.22574780395384, "grad_norm": 1.6484375, "learning_rate": 3.441568388762553e-05, "loss": 6.8197, "step": 21350 }, { "epoch": 1.2263219336688802, "grad_norm": 1.640625, "learning_rate": 3.4371516746469847e-05, "loss": 6.7526, "step": 21360 }, { "epoch": 1.2268960633839205, "grad_norm": 1.546875, "learning_rate": 3.4327363116441136e-05, "loss": 6.8039, "step": 21370 }, { "epoch": 1.2274701930989609, "grad_norm": 1.515625, "learning_rate": 3.4283223035711045e-05, "loss": 6.842, "step": 21380 }, { "epoch": 1.228044322814001, "grad_norm": 1.5078125, "learning_rate": 3.423909654243954e-05, "loss": 6.8283, "step": 21390 }, { "epoch": 1.2286184525290413, "grad_norm": 1.4921875, "learning_rate": 3.4194983674774805e-05, "loss": 6.8216, "step": 21400 }, { "epoch": 1.2291925822440817, "grad_norm": 1.5234375, "learning_rate": 3.415088447085332e-05, "loss": 6.8379, "step": 21410 }, { "epoch": 1.229766711959122, "grad_norm": 1.609375, "learning_rate": 3.410679896879966e-05, "loss": 6.7836, "step": 21420 }, { "epoch": 1.2303408416741624, "grad_norm": 1.4765625, "learning_rate": 3.4062727206726606e-05, "loss": 6.817, "step": 21430 }, { "epoch": 1.2309149713892025, "grad_norm": 1.625, "learning_rate": 3.4018669222735054e-05, "loss": 6.7849, "step": 21440 }, { "epoch": 1.2314891011042428, "grad_norm": 1.5234375, "learning_rate": 3.3974625054914e-05, "loss": 6.8206, "step": 21450 }, { "epoch": 1.2320632308192832, "grad_norm": 1.5703125, "learning_rate": 3.393059474134047e-05, "loss": 6.7817, "step": 21460 }, { "epoch": 1.2326373605343233, "grad_norm": 1.53125, "learning_rate": 3.388657832007951e-05, "loss": 6.8132, "step": 21470 }, { "epoch": 1.2332114902493636, "grad_norm": 1.578125, "learning_rate": 3.384257582918418e-05, "loss": 6.8044, "step": 21480 }, { "epoch": 1.233785619964404, "grad_norm": 1.5390625, "learning_rate": 3.379858730669551e-05, "loss": 6.8424, "step": 21490 }, { "epoch": 1.2343597496794443, "grad_norm": 1.5546875, "learning_rate": 3.375461279064239e-05, "loss": 6.8208, "step": 21500 }, { "epoch": 1.2349338793944846, "grad_norm": 1.5, "learning_rate": 3.371065231904168e-05, "loss": 6.8039, "step": 21510 }, { "epoch": 1.2355080091095247, "grad_norm": 1.5078125, "learning_rate": 3.366670592989803e-05, "loss": 6.7942, "step": 21520 }, { "epoch": 1.236082138824565, "grad_norm": 1.5859375, "learning_rate": 3.362277366120397e-05, "loss": 6.7946, "step": 21530 }, { "epoch": 1.2366562685396054, "grad_norm": 1.625, "learning_rate": 3.357885555093978e-05, "loss": 6.8253, "step": 21540 }, { "epoch": 1.2372303982546458, "grad_norm": 1.546875, "learning_rate": 3.353495163707353e-05, "loss": 6.8448, "step": 21550 }, { "epoch": 1.2378045279696859, "grad_norm": 1.578125, "learning_rate": 3.349106195756101e-05, "loss": 6.8034, "step": 21560 }, { "epoch": 1.2383786576847262, "grad_norm": 1.5703125, "learning_rate": 3.344718655034568e-05, "loss": 6.7819, "step": 21570 }, { "epoch": 1.2389527873997666, "grad_norm": 1.5546875, "learning_rate": 3.340332545335869e-05, "loss": 6.8123, "step": 21580 }, { "epoch": 1.239526917114807, "grad_norm": 1.4921875, "learning_rate": 3.335947870451882e-05, "loss": 6.8522, "step": 21590 }, { "epoch": 1.240101046829847, "grad_norm": 1.6171875, "learning_rate": 3.331564634173243e-05, "loss": 6.8463, "step": 21600 }, { "epoch": 1.2406751765448873, "grad_norm": 1.6328125, "learning_rate": 3.327182840289343e-05, "loss": 6.8039, "step": 21610 }, { "epoch": 1.2412493062599277, "grad_norm": 1.515625, "learning_rate": 3.32280249258833e-05, "loss": 6.8103, "step": 21620 }, { "epoch": 1.241823435974968, "grad_norm": 1.5859375, "learning_rate": 3.3184235948570983e-05, "loss": 6.8569, "step": 21630 }, { "epoch": 1.2423975656900081, "grad_norm": 1.53125, "learning_rate": 3.3140461508812914e-05, "loss": 6.7996, "step": 21640 }, { "epoch": 1.2429716954050485, "grad_norm": 1.5078125, "learning_rate": 3.309670164445292e-05, "loss": 6.8066, "step": 21650 }, { "epoch": 1.2435458251200888, "grad_norm": 1.5546875, "learning_rate": 3.3052956393322287e-05, "loss": 6.7836, "step": 21660 }, { "epoch": 1.2441199548351292, "grad_norm": 1.5546875, "learning_rate": 3.30092257932396e-05, "loss": 6.8482, "step": 21670 }, { "epoch": 1.2446940845501693, "grad_norm": 1.6015625, "learning_rate": 3.296550988201083e-05, "loss": 6.7908, "step": 21680 }, { "epoch": 1.2452682142652096, "grad_norm": 1.5703125, "learning_rate": 3.292180869742924e-05, "loss": 6.774, "step": 21690 }, { "epoch": 1.24584234398025, "grad_norm": 1.546875, "learning_rate": 3.2878122277275313e-05, "loss": 6.803, "step": 21700 }, { "epoch": 1.2464164736952903, "grad_norm": 1.578125, "learning_rate": 3.283445065931685e-05, "loss": 6.7836, "step": 21710 }, { "epoch": 1.2469906034103304, "grad_norm": 1.59375, "learning_rate": 3.279079388130877e-05, "loss": 6.8021, "step": 21720 }, { "epoch": 1.2475647331253708, "grad_norm": 1.6484375, "learning_rate": 3.274715198099324e-05, "loss": 6.7848, "step": 21730 }, { "epoch": 1.248138862840411, "grad_norm": 1.59375, "learning_rate": 3.270352499609952e-05, "loss": 6.8375, "step": 21740 }, { "epoch": 1.2487129925554514, "grad_norm": 1.5546875, "learning_rate": 3.265991296434399e-05, "loss": 6.7839, "step": 21750 }, { "epoch": 1.2492871222704918, "grad_norm": 1.5703125, "learning_rate": 3.261631592343008e-05, "loss": 6.8102, "step": 21760 }, { "epoch": 1.2498612519855319, "grad_norm": 1.625, "learning_rate": 3.257273391104829e-05, "loss": 6.8041, "step": 21770 }, { "epoch": 1.2504353817005722, "grad_norm": 1.6953125, "learning_rate": 3.2529166964876115e-05, "loss": 6.8141, "step": 21780 }, { "epoch": 1.2510095114156126, "grad_norm": 1.5, "learning_rate": 3.248561512257802e-05, "loss": 6.8004, "step": 21790 }, { "epoch": 1.2515836411306527, "grad_norm": 1.5859375, "learning_rate": 3.244207842180542e-05, "loss": 6.7802, "step": 21800 }, { "epoch": 1.252157770845693, "grad_norm": 1.5703125, "learning_rate": 3.2398556900196636e-05, "loss": 6.8308, "step": 21810 }, { "epoch": 1.2527319005607334, "grad_norm": 1.6171875, "learning_rate": 3.235505059537688e-05, "loss": 6.7989, "step": 21820 }, { "epoch": 1.2533060302757737, "grad_norm": 1.5, "learning_rate": 3.2311559544958174e-05, "loss": 6.7923, "step": 21830 }, { "epoch": 1.253880159990814, "grad_norm": 1.5703125, "learning_rate": 3.226808378653938e-05, "loss": 6.8128, "step": 21840 }, { "epoch": 1.2544542897058542, "grad_norm": 1.4765625, "learning_rate": 3.222462335770615e-05, "loss": 6.8132, "step": 21850 }, { "epoch": 1.2550284194208945, "grad_norm": 1.5859375, "learning_rate": 3.218117829603087e-05, "loss": 6.8116, "step": 21860 }, { "epoch": 1.2556025491359348, "grad_norm": 1.6015625, "learning_rate": 3.213774863907262e-05, "loss": 6.7959, "step": 21870 }, { "epoch": 1.256176678850975, "grad_norm": 1.53125, "learning_rate": 3.2094334424377176e-05, "loss": 6.8238, "step": 21880 }, { "epoch": 1.2567508085660153, "grad_norm": 1.5546875, "learning_rate": 3.205093568947699e-05, "loss": 6.7817, "step": 21890 }, { "epoch": 1.2573249382810556, "grad_norm": 1.5390625, "learning_rate": 3.200755247189111e-05, "loss": 6.8276, "step": 21900 }, { "epoch": 1.257899067996096, "grad_norm": 1.5078125, "learning_rate": 3.196418480912515e-05, "loss": 6.7674, "step": 21910 }, { "epoch": 1.2584731977111363, "grad_norm": 1.6015625, "learning_rate": 3.192083273867131e-05, "loss": 6.8002, "step": 21920 }, { "epoch": 1.2590473274261764, "grad_norm": 1.671875, "learning_rate": 3.187749629800829e-05, "loss": 6.7855, "step": 21930 }, { "epoch": 1.2596214571412168, "grad_norm": 1.6328125, "learning_rate": 3.183417552460129e-05, "loss": 6.8001, "step": 21940 }, { "epoch": 1.260195586856257, "grad_norm": 1.515625, "learning_rate": 3.179087045590196e-05, "loss": 6.7425, "step": 21950 }, { "epoch": 1.2607697165712972, "grad_norm": 1.546875, "learning_rate": 3.174758112934836e-05, "loss": 6.7388, "step": 21960 }, { "epoch": 1.2613438462863376, "grad_norm": 1.6171875, "learning_rate": 3.170430758236495e-05, "loss": 6.816, "step": 21970 } ], "logging_steps": 10, "max_steps": 34834, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.04593957210827e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }